diff options
Diffstat (limited to 'arch')
868 files changed, 8671 insertions, 4582 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index c47b328eada0..d9c4f1ef885c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -394,10 +394,10 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE config HAVE_RCU_TABLE_FREE bool -config HAVE_RCU_TABLE_NO_INVALIDATE +config HAVE_MMU_GATHER_PAGE_SIZE bool -config HAVE_MMU_GATHER_PAGE_SIZE +config MMU_GATHER_NO_RANGE bool config HAVE_MMU_GATHER_NO_GATHER diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index ccf9d65166bb..ead9c7acb643 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -327,14 +327,18 @@ static inline int __is_mmio(const volatile void __iomem *addr) #if IO_CONCAT(__IO_PREFIX,trivial_io_bw) extern inline unsigned int ioread8(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); mb(); return ret; } extern inline unsigned int ioread16(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); mb(); return ret; } @@ -375,7 +379,9 @@ extern inline void outw(u16 b, unsigned long port) #if IO_CONCAT(__IO_PREFIX,trivial_io_lq) extern inline unsigned int ioread32(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); mb(); return ret; } @@ -420,14 +426,18 @@ extern inline void __raw_writew(u16 b, volatile void __iomem *addr) extern inline u8 readb(const volatile void __iomem *addr) { - u8 ret = __raw_readb(addr); + u8 ret; + mb(); + ret = __raw_readb(addr); mb(); return ret; } extern inline u16 readw(const volatile void __iomem *addr) { - u16 ret = __raw_readw(addr); + u16 ret; + mb(); + ret = __raw_readw(addr); mb(); return ret; } @@ -468,14 +478,18 @@ extern inline void __raw_writeq(u64 b, volatile void __iomem *addr) extern inline u32 readl(const volatile void __iomem *addr) { - u32 ret = __raw_readl(addr); + u32 ret; + mb(); + ret = __raw_readl(addr); mb(); return ret; } extern inline u64 readq(const volatile void __iomem *addr) { - u64 ret = __raw_readq(addr); + u64 ret; + mb(); + ret = __raw_readq(addr); mb(); return ret; } @@ -504,14 +518,44 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) #define outb_p outb #define outw_p outw #define outl_p outl -#define readb_relaxed(addr) __raw_readb(addr) -#define readw_relaxed(addr) __raw_readw(addr) -#define readl_relaxed(addr) __raw_readl(addr) -#define readq_relaxed(addr) __raw_readq(addr) -#define writeb_relaxed(b, addr) __raw_writeb(b, addr) -#define writew_relaxed(b, addr) __raw_writew(b, addr) -#define writel_relaxed(b, addr) __raw_writel(b, addr) -#define writeq_relaxed(b, addr) __raw_writeq(b, addr) + +extern u8 readb_relaxed(const volatile void __iomem *addr); +extern u16 readw_relaxed(const volatile void __iomem *addr); +extern u32 readl_relaxed(const volatile void __iomem *addr); +extern u64 readq_relaxed(const volatile void __iomem *addr); + +#if IO_CONCAT(__IO_PREFIX,trivial_io_bw) +extern inline u8 readb_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readb(addr); +} + +extern inline u16 readw_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readw(addr); +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_io_lq) +extern inline u32 readl_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readl(addr); +} + +extern inline u64 readq_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readq(addr); +} +#endif + +#define writeb_relaxed writeb +#define writew_relaxed writew +#define writel_relaxed writel +#define writeq_relaxed writeq /* * String version of IO memory access ops: diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index c025a3e5e357..938de13adfbf 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -16,21 +16,27 @@ unsigned int ioread8(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); mb(); return ret; } unsigned int ioread16(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); mb(); return ret; } unsigned int ioread32(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); mb(); return ret; } @@ -148,28 +154,36 @@ EXPORT_SYMBOL(__raw_writeq); u8 readb(const volatile void __iomem *addr) { - u8 ret = __raw_readb(addr); + u8 ret; + mb(); + ret = __raw_readb(addr); mb(); return ret; } u16 readw(const volatile void __iomem *addr) { - u16 ret = __raw_readw(addr); + u16 ret; + mb(); + ret = __raw_readw(addr); mb(); return ret; } u32 readl(const volatile void __iomem *addr) { - u32 ret = __raw_readl(addr); + u32 ret; + mb(); + ret = __raw_readl(addr); mb(); return ret; } u64 readq(const volatile void __iomem *addr) { - u64 ret = __raw_readq(addr); + u64 ret; + mb(); + ret = __raw_readq(addr); mb(); return ret; } @@ -207,6 +221,38 @@ EXPORT_SYMBOL(writew); EXPORT_SYMBOL(writel); EXPORT_SYMBOL(writeq); +/* + * The _relaxed functions must be ordered w.r.t. each other, but they don't + * have to be ordered w.r.t. other memory accesses. + */ +u8 readb_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readb(addr); +} + +u16 readw_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readw(addr); +} + +u32 readl_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readl(addr); +} + +u64 readq_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readq(addr); +} + +EXPORT_SYMBOL(readb_relaxed); +EXPORT_SYMBOL(readw_relaxed); +EXPORT_SYMBOL(readl_relaxed); +EXPORT_SYMBOL(readq_relaxed); /* * Read COUNT 8-bit bytes from port PORT into memory starting at SRC. diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 08bcfed6b80f..134cc223ea81 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -77,6 +77,7 @@ interrupt-names = "macirq"; phy-mode = "rgmii"; snps,pbl = < 32 >; + snps,multicast-filter-bins = <256>; clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index c77a0e3671ac..0284ace0e1ab 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -19,7 +19,7 @@ #define R_ARC_32_PCREL 0x31 /*to set parameters in the core dumps */ -#define ELF_ARCH EM_ARCOMPACT +#define ELF_ARCH EM_ARC_INUSE #define ELF_CLASS ELFCLASS32 #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index 54f5ec5c1759..619866599bff 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -11,6 +11,8 @@ #ifdef __ASSEMBLY__ #define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index a2bfacbcfce1..4bd108d1a14a 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -153,7 +153,6 @@ END(EV_Extension) tracesys: ; save EFA in case tracer wants the PC of traced task ; using ERET won't work since next-PC has already committed - lr r12, [efa] GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address @@ -196,15 +195,9 @@ tracesys_exit: ; Breakpoint TRAP ; --------------------------------------------- trap_with_param: - - ; stop_pc info by gdb needs this info - lr r0, [efa] + mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc mov r1, sp - ; Now that we have read EFA, it is safe to do "fake" rtie - ; and get out of CPU exception mode - FAKE_RET_FROM_EXCPN - ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File ; NOTE: clobbers r12 @@ -231,6 +224,10 @@ ENTRY(EV_Trap) EXCEPTION_PROLOGUE + lr r12, [efa] + + FAKE_RET_FROM_EXCPN + ;============ TRAP 1 :breakpoints ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR) bmsk.f 0, r9, 7 @@ -238,9 +235,6 @@ ENTRY(EV_Trap) ;============ TRAP (no param): syscall top level - ; First return from Exception to pure K mode (Exception/IRQs renabled) - FAKE_RET_FROM_EXCPN - ; If syscall tracing ongoing, invoke pre-post-hooks GET_CURR_THR_INFO_FLAGS r10 btst r10, TIF_SYSCALL_TRACE diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 861a8aea51f9..661fd842ea97 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -614,8 +614,8 @@ static int arc_pmu_device_probe(struct platform_device *pdev) /* loop thru all available h/w condition indexes */ for (i = 0; i < cc_bcr.c; i++) { write_aux_reg(ARC_REG_CC_INDEX, i); - cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); - cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); + cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0)); + cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1)); arc_pmu_map_hw_event(i, cc_name.str); arc_pmu_add_raw_event_attr(i, cc_name.str); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7ee89dc61f6e..23dc002aa574 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -12,6 +12,7 @@ #include <linux/clocksource.h> #include <linux/console.h> #include <linux/module.h> +#include <linux/sizes.h> #include <linux/cpu.h> #include <linux/of_fdt.h> #include <linux/of.h> @@ -409,12 +410,12 @@ static void arc_chk_core_config(void) if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr) panic("Linux built with incorrect DCCM Base address\n"); - if (CONFIG_ARC_DCCM_SZ != cpu->dccm.sz) + if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz) panic("Linux built with incorrect DCCM Size\n"); #endif #ifdef CONFIG_ARC_HAS_ICCM - if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz) + if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz) panic("Linux built with incorrect ICCM Size\n"); #endif diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index 2eaecfb063a7..ea52d05e1922 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -6,8 +6,9 @@ menuconfig ARC_PLAT_EZNPS bool "\"EZchip\" ARC dev platform" + depends on ISA_ARCOMPACT select CPU_BIG_ENDIAN - select CLKSRC_NPS + select CLKSRC_NPS if !PHYS_ADDR_T_64BIT select EZNPS_GIC select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6029d324911c..b0eadcba4bd2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -67,7 +67,7 @@ config ARM select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT - select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE @@ -1981,7 +1981,7 @@ config XIP_DEFLATED_DATA config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) - depends on !CPU_V7M + depends on MMU select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1a63e497493c..d81ebbbbd7ad 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -310,13 +310,15 @@ endif ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y) prepare: stack_protector_prepare stack_protector_prepare: prepare0 - $(eval KBUILD_CFLAGS += \ + $(eval SSP_PLUGIN_CFLAGS := \ -fplugin-arg-arm_ssp_per_task_plugin-tso=$(shell \ awk '{if ($$2 == "THREAD_SZ_ORDER") print $$3;}'\ include/generated/asm-offsets.h) \ -fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \ awk '{if ($$2 == "TI_STACK_CANARY") print $$3;}'\ include/generated/asm-offsets.h)) + $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS)) + $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS)) endif all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 9219389bbe61..1483966dcf23 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -101,7 +101,6 @@ clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \ $(libfdt) $(libfdt_hdrs) hyp-stub.S KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) @@ -117,7 +116,8 @@ CFLAGS_fdt_ro.o := $(nossp_flags) CFLAGS_fdt_rw.o := $(nossp_flags) CFLAGS_fdt_wip.o := $(nossp_flags) -ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj) +ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \ + -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index b36c0289a308..6a0f1f524466 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -2,11 +2,13 @@ #ifndef _ARM_LIBFDT_ENV_H #define _ARM_LIBFDT_ENV_H +#include <linux/limits.h> #include <linux/types.h> #include <linux/string.h> #include <asm/byteorder.h> -#define INT_MAX ((int)(~0U>>1)) +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX typedef __be16 fdt16_t; typedef __be32 fdt32_t; diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index fc7ed03d8b93..51b078604978 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -43,7 +43,7 @@ SECTIONS } .table : ALIGN(4) { _table_start = .; - LONG(ZIMAGE_MAGIC(2)) + LONG(ZIMAGE_MAGIC(4)) LONG(ZIMAGE_MAGIC(0x5a534c4b)) LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start)) LONG(ZIMAGE_MAGIC(_kernel_bss_size)) diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi index 7ad079861efd..91f93bc89716 100644 --- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi +++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi @@ -131,6 +131,11 @@ }; / { + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x20000000>; /* 512 MB */ + }; + clk_mcasp0_fixed: clk_mcasp0_fixed { #clock-cells = <0>; compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/am335x-pocketbeagle.dts b/arch/arm/boot/dts/am335x-pocketbeagle.dts index ff4f919d22f6..abf2badce53d 100644 --- a/arch/arm/boot/dts/am335x-pocketbeagle.dts +++ b/arch/arm/boot/dts/am335x-pocketbeagle.dts @@ -88,7 +88,6 @@ AM33XX_PADCONF(AM335X_PIN_MMC0_DAT3, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CMD, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CLK, PIN_INPUT_PULLUP, MUX_MODE0) - AM33XX_PADCONF(AM335X_PIN_MCASP0_ACLKR, PIN_INPUT, MUX_MODE4) /* (B12) mcasp0_aclkr.mmc0_sdwp */ >; }; diff --git a/arch/arm/boot/dts/am335x-sancloud-bbe.dts b/arch/arm/boot/dts/am335x-sancloud-bbe.dts index 8678e6e35493..e5fdb7abb0d5 100644 --- a/arch/arm/boot/dts/am335x-sancloud-bbe.dts +++ b/arch/arm/boot/dts/am335x-sancloud-bbe.dts @@ -108,7 +108,7 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii-txid"; + phy-mode = "rgmii-id"; }; &i2c0 { diff --git a/arch/arm/boot/dts/am3874-iceboard.dts b/arch/arm/boot/dts/am3874-iceboard.dts index 883fb85135d4..1b4b2b0500e4 100644 --- a/arch/arm/boot/dts/am3874-iceboard.dts +++ b/arch/arm/boot/dts/am3874-iceboard.dts @@ -111,13 +111,13 @@ reg = <0x70>; #address-cells = <1>; #size-cells = <0>; + i2c-mux-idle-disconnect; i2c@0 { /* FMC A */ #address-cells = <1>; #size-cells = <0>; reg = <0>; - i2c-mux-idle-disconnect; }; i2c@1 { @@ -125,7 +125,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <1>; - i2c-mux-idle-disconnect; }; i2c@2 { @@ -133,7 +132,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <2>; - i2c-mux-idle-disconnect; }; i2c@3 { @@ -141,7 +139,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <3>; - i2c-mux-idle-disconnect; }; i2c@4 { @@ -149,14 +146,12 @@ #address-cells = <1>; #size-cells = <0>; reg = <4>; - i2c-mux-idle-disconnect; }; i2c@5 { #address-cells = <1>; #size-cells = <0>; reg = <5>; - i2c-mux-idle-disconnect; ina230@40 { compatible = "ti,ina230"; reg = <0x40>; shunt-resistor = <5000>; }; ina230@41 { compatible = "ti,ina230"; reg = <0x41>; shunt-resistor = <5000>; }; @@ -182,14 +177,12 @@ #address-cells = <1>; #size-cells = <0>; reg = <6>; - i2c-mux-idle-disconnect; }; i2c@7 { #address-cells = <1>; #size-cells = <0>; reg = <7>; - i2c-mux-idle-disconnect; u41: pca9575@20 { compatible = "nxp,pca9575"; diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index cae4500194fe..811c8cae315b 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -86,7 +86,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; backlight = <&lcd_bl>; diff --git a/arch/arm/boot/dts/am437x-idk-evm.dts b/arch/arm/boot/dts/am437x-idk-evm.dts index f3ced6df0c9b..9f66f96d09c9 100644 --- a/arch/arm/boot/dts/am437x-idk-evm.dts +++ b/arch/arm/boot/dts/am437x-idk-evm.dts @@ -526,11 +526,11 @@ * Supply voltage supervisor on board will not allow opp50 so * disable it and set opp100 as suspend OPP. */ - opp50@300000000 { + opp50-300000000 { status = "disabled"; }; - opp100@600000000 { + opp100-600000000 { opp-suspend; }; }; diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 04bee4ff9dcb..53017a1e5ab0 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1578,8 +1578,9 @@ reg-names = "rev"; ti,hwmods = "d_can0"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>; - clock-names = "fck"; + clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>, + <&dcan0_fck>; + clock-names = "fck", "osc"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0xcc000 0x2000>; @@ -1587,6 +1588,8 @@ dcan0: can@0 { compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; + clocks = <&dcan0_fck>; + clock-names = "fck"; syscon-raminit = <&scm_conf 0x644 0>; interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; @@ -1599,8 +1602,9 @@ reg-names = "rev"; ti,hwmods = "d_can1"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>; - clock-names = "fck"; + clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>, + <&dcan1_fck>; + clock-names = "fck", "osc"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0xd0000 0x2000>; @@ -1608,6 +1612,8 @@ dcan1: can@0 { compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; + clocks = <&dcan1_fck>; + clock-name = "fck"; syscon-raminit = <&scm_conf 0x644 1>; interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 95314121d111..a6fbc088daa8 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -42,7 +42,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; backlight = <&lcd_bl>; @@ -848,6 +848,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&spi0_pins_default>; pinctrl-1 = <&spi0_pins_sleep>; + ti,pindir-d0-out-d1-in = <1>; }; &spi1 { @@ -855,6 +856,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&spi1_pins_default>; pinctrl-1 = <&spi1_pins_sleep>; + ti,pindir-d0-out-d1-in = <1>; }; &usb2_phy1 { diff --git a/arch/arm/boot/dts/am43xx-clocks.dtsi b/arch/arm/boot/dts/am43xx-clocks.dtsi index 091356f2a8c1..c726cd8dbdf1 100644 --- a/arch/arm/boot/dts/am43xx-clocks.dtsi +++ b/arch/arm/boot/dts/am43xx-clocks.dtsi @@ -704,6 +704,60 @@ ti,bit-shift = <8>; reg = <0x2a48>; }; + + clkout1_osc_div_ck: clkout1-osc-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&sys_clkin_ck>; + ti,bit-shift = <20>; + ti,max-div = <4>; + reg = <0x4100>; + }; + + clkout1_src2_mux_ck: clkout1-src2-mux-ck { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&clk_rc32k_ck>, <&sysclk_div>, <&dpll_ddr_m2_ck>, + <&dpll_per_m2_ck>, <&dpll_disp_m2_ck>, + <&dpll_mpu_m2_ck>; + reg = <0x4100>; + }; + + clkout1_src2_pre_div_ck: clkout1-src2-pre-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&clkout1_src2_mux_ck>; + ti,bit-shift = <4>; + ti,max-div = <8>; + reg = <0x4100>; + }; + + clkout1_src2_post_div_ck: clkout1-src2-post-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&clkout1_src2_pre_div_ck>; + ti,bit-shift = <8>; + ti,max-div = <32>; + ti,index-power-of-two; + reg = <0x4100>; + }; + + clkout1_mux_ck: clkout1-mux-ck { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&clkout1_osc_div_ck>, <&clk_rc32k_ck>, + <&clkout1_src2_post_div_ck>, <&dpll_extdev_m2_ck>; + ti,bit-shift = <16>; + reg = <0x4100>; + }; + + clkout1_ck: clkout1-ck { + #clock-cells = <0>; + compatible = "ti,gate-clock"; + clocks = <&clkout1_mux_ck>; + ti,bit-shift = <23>; + reg = <0x4100>; + }; }; &prcm { diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index 0aaacea1d887..10105a497c1a 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -167,11 +167,7 @@ &pcie1_rc { status = "okay"; - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; -}; - -&pcie1_ep { - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 18 GPIO_ACTIVE_HIGH>; }; &mmc1 { diff --git a/arch/arm/boot/dts/am572x-idk-common.dtsi b/arch/arm/boot/dts/am572x-idk-common.dtsi index a064f13b3880..ddf123620e96 100644 --- a/arch/arm/boot/dts/am572x-idk-common.dtsi +++ b/arch/arm/boot/dts/am572x-idk-common.dtsi @@ -147,10 +147,6 @@ gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; }; -&pcie1_ep { - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; -}; - &mailbox5 { status = "okay"; mbox_ipu1_ipc3x: mbox_ipu1_ipc3x { diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi index bc76f1705c0f..a813a0cf3ff3 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi +++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi @@ -29,6 +29,27 @@ reg = <0x0 0x80000000 0x0 0x80000000>; }; + main_12v0: fixedregulator-main_12v0 { + /* main supply */ + compatible = "regulator-fixed"; + regulator-name = "main_12v0"; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; + regulator-always-on; + regulator-boot-on; + }; + + evm_5v0: fixedregulator-evm_5v0 { + /* Output of TPS54531D */ + compatible = "regulator-fixed"; + regulator-name = "evm_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&main_12v0>; + regulator-always-on; + regulator-boot-on; + }; + vdd_3v3: fixedregulator-vdd_3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; @@ -547,10 +568,6 @@ gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; }; -&pcie1_ep { - gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; -}; - &mcasp3 { #sound-dai-cells = <0>; assigned-clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>; diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 3f4bb44d85f0..669da3a33d82 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -339,7 +339,8 @@ comphy: phy@18300 { compatible = "marvell,armada-380-comphy"; - reg = <0x18300 0x100>; + reg-names = "comphy", "conf"; + reg = <0x18300 0x100>, <0x18460 4>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index ba7f3e646c26..b8db77b7f5d8 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -40,7 +40,7 @@ ahb { usb0: gadget@300000 { - atmel,vbus-gpio = <&pioA PIN_PA27 GPIO_ACTIVE_HIGH>; + atmel,vbus-gpio = <&pioA PIN_PB11 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usba_vbus>; status = "okay"; @@ -125,8 +125,6 @@ bus-width = <8>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sdmmc0_default>; - non-removable; - mmc-ddr-1_8v; status = "okay"; }; diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index dee9c0c8a096..16c6fd3c4246 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -187,7 +187,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - <AT91_PIOB 4 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 4 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 5 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -221,7 +221,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - <AT91_PIOB 6 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 6 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 7 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -239,7 +239,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - <AT91_PIOB 8 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 8 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 9 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -257,7 +257,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - <AT91_PIOB 10 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 10 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 11 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -275,7 +275,7 @@ uart0 { pinctrl_uart0: uart0-0 { atmel,pins = - <AT91_PIOA 31 AT91_PERIPH_B AT91_PINCTRL_NONE + <AT91_PIOA 31 AT91_PERIPH_B AT91_PINCTRL_PULL_UP AT91_PIOA 30 AT91_PERIPH_B AT91_PINCTRL_PULL_UP>; }; }; @@ -283,7 +283,7 @@ uart1 { pinctrl_uart1: uart1-0 { atmel,pins = - <AT91_PIOB 12 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 12 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 13 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; }; diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi index dba025a98527..5ed3d745ac86 100644 --- a/arch/arm/boot/dts/at91sam9261.dtsi +++ b/arch/arm/boot/dts/at91sam9261.dtsi @@ -329,7 +329,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - <AT91_PIOC 8 AT91_PERIPH_A AT91_PINCTRL_NONE>, + <AT91_PIOC 8 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>, <AT91_PIOC 9 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -347,7 +347,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - <AT91_PIOC 12 AT91_PERIPH_A AT91_PINCTRL_NONE>, + <AT91_PIOC 12 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>, <AT91_PIOC 13 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -365,7 +365,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - <AT91_PIOC 14 AT91_PERIPH_A AT91_PINCTRL_NONE>, + <AT91_PIOC 14 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>, <AT91_PIOC 15 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index 99678abdda93..5c990cfae254 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -183,7 +183,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - <AT91_PIOA 26 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOA 26 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOA 27 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -201,7 +201,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - <AT91_PIOD 0 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOD 0 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOD 1 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -219,7 +219,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - <AT91_PIOD 2 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOD 2 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOD 3 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 9483609a2105..53a929b7e4a6 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -556,7 +556,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - <AT91_PIOB 19 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 19 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 18 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -574,7 +574,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - <AT91_PIOB 4 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 4 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 5 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -592,7 +592,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - <AT91_PIOB 6 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 6 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 7 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -610,7 +610,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - <AT91_PIOB 8 AT91_PERIPH_A AT91_PINCTRL_NONE + <AT91_PIOB 8 AT91_PERIPH_A AT91_PINCTRL_PULL_UP AT91_PIOB 9 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi index e2d38ce43442..f0e1f6ec3600 100644 --- a/arch/arm/boot/dts/at91sam9rl.dtsi +++ b/arch/arm/boot/dts/at91sam9rl.dtsi @@ -682,7 +682,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - <AT91_PIOA 6 AT91_PERIPH_A AT91_PINCTRL_NONE>, + <AT91_PIOA 6 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>, <AT91_PIOA 7 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -721,7 +721,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - <AT91_PIOA 11 AT91_PERIPH_A AT91_PINCTRL_NONE>, + <AT91_PIOA 11 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>, <AT91_PIOA 12 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -744,7 +744,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - <AT91_PIOA 13 AT91_PERIPH_A AT91_PINCTRL_NONE>, + <AT91_PIOA 13 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>, <AT91_PIOA 14 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; @@ -767,7 +767,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - <AT91_PIOB 0 AT91_PERIPH_A AT91_PINCTRL_NONE>, + <AT91_PIOB 0 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>, <AT91_PIOB 1 AT91_PERIPH_A AT91_PINCTRL_PULL_UP>; }; diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 5f7b46503a51..9cfe5096ebb8 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -174,8 +174,8 @@ mdio: mdio@18002000 { compatible = "brcm,iproc-mdio"; reg = <0x18002000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; status = "disabled"; gphy0: ethernet-phy@0 { diff --git a/arch/arm/boot/dts/bcm-hr2.dtsi b/arch/arm/boot/dts/bcm-hr2.dtsi index e4d49731287f..e35398cc60a0 100644 --- a/arch/arm/boot/dts/bcm-hr2.dtsi +++ b/arch/arm/boot/dts/bcm-hr2.dtsi @@ -75,7 +75,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; clocks = <&periph_clk>; }; @@ -83,7 +83,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x20>; interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(1) | - IRQ_TYPE_LEVEL_HIGH)>; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; @@ -91,7 +91,7 @@ compatible = "arm,cortex-a9-twd-wdt"; reg = <0x20620 0x20>; interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(1) | - IRQ_TYPE_LEVEL_HIGH)>; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 6925b30c2253..ff3d4eaf1e7b 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -257,10 +257,10 @@ status = "disabled"; }; - mailbox: mailbox@25000 { + mailbox: mailbox@25c00 { compatible = "brcm,iproc-fa2-mbox"; - reg = <0x25000 0x445>; - interrupts = <GIC_SPI 150 IRQ_TYPE_LEVEL_HIGH>; + reg = <0x25c00 0x400>; + interrupts = <GIC_SPI 151 IRQ_TYPE_LEVEL_HIGH>; #mbox-cells = <1>; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index ba0167df6c5f..f0c504c024d0 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -19,7 +19,7 @@ leds { act { - gpios = <&gpio 47 GPIO_ACTIVE_HIGH>; + gpios = <&gpio 47 GPIO_ACTIVE_LOW>; }; }; @@ -105,6 +105,7 @@ &sdhci { #address-cells = <1>; #size-cells = <0>; + pinctrl-names = "default"; pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>; mmc-pwrseq = <&wifi_pwrseq>; non-removable; diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi index 81399b2c5af9..d4f0e455612d 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi @@ -8,6 +8,14 @@ reg = <0 0x40000000>; }; + leds { + /* + * Since there is no upstream GPIO driver yet, + * remove the incomplete node. + */ + /delete-node/ act; + }; + reg_3v3: fixed-regulator { compatible = "regulator-fixed"; regulator-name = "3V3"; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 9777644c6c2b..5e2716678856 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -40,7 +40,7 @@ trips { cpu-crit { - temperature = <80000>; + temperature = <90000>; hysteresis = <0>; type = "critical"; }; @@ -486,6 +486,7 @@ "dsi0_ddr2", "dsi0_ddr"; + status = "disabled"; }; thermal: thermal@7e212000 { diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index ac5266ee8d4c..dc6065de1d69 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -353,8 +353,8 @@ mdio: mdio@18003000 { compatible = "brcm,iproc-mdio"; reg = <0x18003000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; }; mdio-bus-mux { diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts index 3931fb068ff0..91d1018ab75f 100644 --- a/arch/arm/boot/dts/dm8148-evm.dts +++ b/arch/arm/boot/dts/dm8148-evm.dts @@ -24,12 +24,12 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &cpsw_emac1 { phy-handle = <ðphy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &davinci_mdio { diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 9e43d5ec0bb2..79ccdd4470f4 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -33,12 +33,12 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &cpsw_emac1 { phy-handle = <ðphy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &davinci_mdio { diff --git a/arch/arm/boot/dts/dra62x-j5eco-evm.dts b/arch/arm/boot/dts/dra62x-j5eco-evm.dts index 861ab90a3f3a..c16e183822be 100644 --- a/arch/arm/boot/dts/dra62x-j5eco-evm.dts +++ b/arch/arm/boot/dts/dra62x-j5eco-evm.dts @@ -24,12 +24,12 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &cpsw_emac1 { phy-handle = <ðphy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &davinci_mdio { diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 099d6fe2a57a..bb62afecf779 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3450,6 +3450,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER13_CLKCTRL 24>; clock-names = "fck"; interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>; + ti,timer-pwm; }; }; @@ -3478,6 +3479,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>; clock-names = "fck"; interrupts = <GIC_SPI 340 IRQ_TYPE_LEVEL_HIGH>; + ti,timer-pwm; }; }; @@ -3506,6 +3508,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; clock-names = "fck"; interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>; + ti,timer-pwm; }; }; @@ -3534,6 +3537,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; clock-names = "fck"; interrupts = <GIC_SPI 342 IRQ_TYPE_LEVEL_HIGH>; + ti,timer-pwm; }; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 953f0ffce2a9..a6ef3d137c7a 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -148,6 +148,7 @@ #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0x0 0xc0000000>; + dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>; ti,hwmods = "l3_main_1", "l3_main_2"; reg = <0x0 0x44000000 0x0 0x1000000>, <0x0 0x45000000 0x0 0x1000>; @@ -171,6 +172,7 @@ #address-cells = <1>; ranges = <0x51000000 0x51000000 0x3000 0x0 0x20000000 0x10000000>; + dma-ranges; /** * To enable PCI endpoint mode, disable the pcie1_rc * node and enable pcie1_ep mode. @@ -228,6 +230,7 @@ #address-cells = <1>; ranges = <0x51800000 0x51800000 0x3000 0x0 0x30000000 0x10000000>; + dma-ranges; status = "disabled"; pcie2_rc: pcie@51800000 { reg = <0x51800000 0x2000>, <0x51802000 0x14c>, <0x1000 0x2000>; diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index cdcba3f561c4..9f6fbe4c1fee 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -86,3 +86,8 @@ &usb4_tm { status = "disabled"; }; + +&mmc3 { + /* dra76x is not affected by i887 */ + max-frequency = <96000000>; +}; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index 93e1eb83bed9..d7d98d2069df 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -796,16 +796,6 @@ clock-div = <1>; }; - ipu1_gfclk_mux: ipu1_gfclk_mux@520 { - #clock-cells = <0>; - compatible = "ti,mux-clock"; - clocks = <&dpll_abe_m2x2_ck>, <&dpll_core_h22x2_ck>; - ti,bit-shift = <24>; - reg = <0x0520>; - assigned-clocks = <&ipu1_gfclk_mux>; - assigned-clock-parents = <&dpll_core_h22x2_ck>; - }; - dummy_ck: dummy_ck { #clock-cells = <0>; compatible = "fixed-clock"; @@ -1564,6 +1554,8 @@ compatible = "ti,clkctrl"; reg = <0x20 0x4>; #clock-cells = <2>; + assigned-clocks = <&ipu1_clkctrl DRA7_IPU1_MMU_IPU1_CLKCTRL 24>; + assigned-clock-parents = <&dpll_core_h22x2_ck>; }; ipu_clkctrl: ipu-clkctrl@50 { diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts index bf092e97e14f..60f235d7f277 100644 --- a/arch/arm/boot/dts/exynos4210-universal_c210.dts +++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts @@ -115,7 +115,7 @@ gpio-sck = <&gpy3 1 GPIO_ACTIVE_HIGH>; gpio-mosi = <&gpy3 3 GPIO_ACTIVE_HIGH>; num-chipselects = <1>; - cs-gpios = <&gpy4 3 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpy4 3 GPIO_ACTIVE_LOW>; lcd@0 { compatible = "samsung,ld9040"; @@ -124,8 +124,6 @@ vci-supply = <&ldo17_reg>; reset-gpios = <&gpy4 5 GPIO_ACTIVE_HIGH>; spi-max-frequency = <1200000>; - spi-cpol; - spi-cpha; power-on-delay = <10>; reset-delay = <10>; panel-width-mm = <90>; diff --git a/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi b/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi index 30eee5942eff..945ee8f55e65 100644 --- a/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi +++ b/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi @@ -50,7 +50,7 @@ i2c_cm36651: i2c-gpio-2 { compatible = "i2c-gpio"; - gpios = <&gpf0 0 GPIO_ACTIVE_LOW>, <&gpf0 1 GPIO_ACTIVE_LOW>; + gpios = <&gpf0 0 GPIO_ACTIVE_HIGH>, <&gpf0 1 GPIO_ACTIVE_HIGH>; i2c-gpio,delay-us = <2>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts index bfaa2de63a10..e2030ba16512 100644 --- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts +++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts @@ -72,7 +72,6 @@ reg = <0>; /* 50 ns min period = 20 MHz */ spi-max-frequency = <20000000>; - spi-cpol; /* Clock active low */ vcc-supply = <&vdisp>; iovcc-supply = <&vdisp>; vci-supply = <&vdisp>; diff --git a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts index 5e5e282ed30e..d3681aa2a56a 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts +++ b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts @@ -75,8 +75,8 @@ imx27-phycard-s-rdk { pinctrl_i2c1: i2c1grp { fsl,pins = < - MX27_PAD_I2C2_SDA__I2C2_SDA 0x0 - MX27_PAD_I2C2_SCL__I2C2_SCL 0x0 + MX27_PAD_I2C_DATA__I2C_DATA 0x0 + MX27_PAD_I2C_CLK__I2C_CLK 0x0 >; }; diff --git a/arch/arm/boot/dts/imx6-logicpd-som.dtsi b/arch/arm/boot/dts/imx6-logicpd-som.dtsi index 7ceae3573248..547fb141ec0c 100644 --- a/arch/arm/boot/dts/imx6-logicpd-som.dtsi +++ b/arch/arm/boot/dts/imx6-logicpd-som.dtsi @@ -207,6 +207,10 @@ vin-supply = <&sw1c_reg>; }; +&snvs_poweroff { + status = "okay"; +}; + &iomuxc { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_hog>; diff --git a/arch/arm/boot/dts/imx6dl-icore-mipi.dts b/arch/arm/boot/dts/imx6dl-icore-mipi.dts index e43bccb78ab2..d8f3821a0ffd 100644 --- a/arch/arm/boot/dts/imx6dl-icore-mipi.dts +++ b/arch/arm/boot/dts/imx6dl-icore-mipi.dts @@ -8,7 +8,7 @@ /dts-v1/; #include "imx6dl.dtsi" -#include "imx6qdl-icore.dtsi" +#include "imx6qdl-icore-1.5.dtsi" / { model = "Engicam i.CoreM6 DualLite/Solo MIPI Starter Kit"; diff --git a/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts b/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts index 0d594e4bd559..a1173bf5bff5 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts +++ b/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts @@ -38,7 +38,7 @@ }; &switch_ports { - /delete-node/ port@2; + /delete-node/ port@3; }; &touchscreen { diff --git a/arch/arm/boot/dts/imx6q-b450v3.dts b/arch/arm/boot/dts/imx6q-b450v3.dts index 95b8f2d71821..fb0980190aa0 100644 --- a/arch/arm/boot/dts/imx6q-b450v3.dts +++ b/arch/arm/boot/dts/imx6q-b450v3.dts @@ -65,13 +65,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>, - <&clks IMX6QDL_CLK_PLL3_USB_OTG>; -}; - &ldb { status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-b650v3.dts b/arch/arm/boot/dts/imx6q-b650v3.dts index 611cb7ae7e55..8f762d9c5ae9 100644 --- a/arch/arm/boot/dts/imx6q-b650v3.dts +++ b/arch/arm/boot/dts/imx6q-b650v3.dts @@ -65,13 +65,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>, - <&clks IMX6QDL_CLK_PLL3_USB_OTG>; -}; - &ldb { status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-b850v3.dts b/arch/arm/boot/dts/imx6q-b850v3.dts index e4cb118f88c6..1ea64ecf4291 100644 --- a/arch/arm/boot/dts/imx6q-b850v3.dts +++ b/arch/arm/boot/dts/imx6q-b850v3.dts @@ -53,17 +53,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>, - <&clks IMX6QDL_CLK_IPU1_DI0_PRE_SEL>, - <&clks IMX6QDL_CLK_IPU2_DI0_PRE_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL2_PFD2_396M>, - <&clks IMX6QDL_CLK_PLL2_PFD2_396M>; -}; - &ldb { fsl,dual-channel; status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/imx6q-bx50v3.dtsi index fa27dcdf06f1..1938b04199c4 100644 --- a/arch/arm/boot/dts/imx6q-bx50v3.dtsi +++ b/arch/arm/boot/dts/imx6q-bx50v3.dtsi @@ -377,3 +377,18 @@ #interrupt-cells = <1>; }; }; + +&clks { + assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, + <&clks IMX6QDL_CLK_IPU1_DI0_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU1_DI1_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU2_DI0_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU2_DI1_PRE_SEL>; + assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, + <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>; +}; diff --git a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts index 9c61e3be2d9a..1c46df6827f5 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts +++ b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts @@ -55,7 +55,7 @@ #sound-dai-cells = <0>; clocks = <&clk_ext_audio_codec>; VDDA-supply = <®_3p3v>; - VDDIO-supply = <®_3p3v>; + VDDIO-supply = <&sw2_reg>; }; }; diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi index 387801dde02e..08a2e17e0539 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi +++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi @@ -206,7 +206,7 @@ }; rtc@56 { - compatible = "rv3029c2"; + compatible = "microcrystal,rv3029"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_rtc_hw300>; reg = <0x56>; diff --git a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi index c23ba229fd05..8c33510c9519 100644 --- a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi @@ -105,19 +105,16 @@ sound-digital { compatible = "simple-audio-card"; simple-audio-card,name = "tda1997x-audio"; + simple-audio-card,format = "i2s"; + simple-audio-card,bitclock-master = <&sound_codec>; + simple-audio-card,frame-master = <&sound_codec>; - simple-audio-card,dai-link@0 { - format = "i2s"; - - cpu { - sound-dai = <&ssi2>; - }; + sound_cpu: simple-audio-card,cpu { + sound-dai = <&ssi1>; + }; - codec { - bitclock-master; - frame-master; - sound-dai = <&hdmi_receiver>; - }; + sound_codec: simple-audio-card,codec { + sound-dai = <&hdmi_receiver>; }; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-icore.dtsi b/arch/arm/boot/dts/imx6qdl-icore.dtsi index 7814f1ef0804..fde56f98398d 100644 --- a/arch/arm/boot/dts/imx6qdl-icore.dtsi +++ b/arch/arm/boot/dts/imx6qdl-icore.dtsi @@ -384,7 +384,7 @@ pinctrl_usbotg: usbotggrp { fsl,pins = < - MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x17059 + MX6QDL_PAD_ENET_RX_ER__USB_OTG_ID 0x17059 >; }; @@ -396,6 +396,7 @@ MX6QDL_PAD_SD1_DAT1__SD1_DATA1 0x17070 MX6QDL_PAD_SD1_DAT2__SD1_DATA2 0x17070 MX6QDL_PAD_SD1_DAT3__SD1_DATA3 0x17070 + MX6QDL_PAD_GPIO_1__GPIO1_IO01 0x1b0b0 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi index 6486df3e2942..31fa37d2fe47 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi @@ -107,14 +107,14 @@ regulators { vdd_arm: buck1 { regulator-name = "vdd_arm"; - regulator-min-microvolt = <730000>; + regulator-min-microvolt = <925000>; regulator-max-microvolt = <1380000>; regulator-always-on; }; vdd_soc: buck2 { regulator-name = "vdd_soc"; - regulator-min-microvolt = <730000>; + regulator-min-microvolt = <1150000>; regulator-max-microvolt = <1380000>; regulator-always-on; }; @@ -183,7 +183,6 @@ pinctrl-0 = <&pinctrl_usdhc4>; bus-width = <8>; non-removable; - vmmc-supply = <&vdd_emmc_1p8>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index f3404dd10537..cf628465cd0a 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -230,6 +230,8 @@ accelerometer@1c { compatible = "fsl,mma8451"; reg = <0x1c>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_mma8451_int>; interrupt-parent = <&gpio6>; interrupts = <31 IRQ_TYPE_LEVEL_LOW>; }; @@ -628,6 +630,12 @@ >; }; + pinctrl_mma8451_int: mma8451intgrp { + fsl,pins = < + MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0xb0b1 + >; + }; + pinctrl_pwm3: pwm1grp { fsl,pins = < MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1 diff --git a/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi index 44a97ba93a95..352ac585ca6b 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi @@ -153,6 +153,7 @@ bus-width = <4>; keep-power-in-suspend; mmc-pwrseq = <&pwrseq_ti_wifi>; + cap-power-off-card; non-removable; vmmc-supply = <&vcc_3v3>; /* vqmmc-supply = <&nvcc_sd1>; - MMC layer doesn't like it! */ diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index 93be00a60c88..a66c4fac6baf 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -627,7 +627,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; no-1-8-v; @@ -640,7 +640,7 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; no-1-8-v; @@ -774,6 +774,7 @@ &usbh1 { vbus-supply = <®_5p0v_main>; disable-over-current; + maximum-speed = "full-speed"; status = "okay"; }; @@ -1055,7 +1056,6 @@ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059 MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059 MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059 - MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x40010040 MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x40010040 >; }; @@ -1068,7 +1068,6 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 - MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x40010040 MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x40010040 >; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index b3a77bcf00d5..b6bf9ff93105 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -1040,9 +1040,8 @@ compatible = "fsl,imx6q-fec"; reg = <0x02188000 0x4000>; interrupt-names = "int0", "pps"; - interrupts-extended = - <&intc 0 118 IRQ_TYPE_LEVEL_HIGH>, - <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>, + <0 119 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET_REF>; diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi index 5f51f8e5c1fa..d91f92f944c5 100644 --- a/arch/arm/boot/dts/imx6qp.dtsi +++ b/arch/arm/boot/dts/imx6qp.dtsi @@ -77,7 +77,6 @@ }; &fec { - /delete-property/interrupts-extended; interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>, <0 119 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts index 315044ccd65f..e4719566133c 100644 --- a/arch/arm/boot/dts/imx6sx-sabreauto.dts +++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts @@ -99,7 +99,7 @@ &fec2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet2>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <ðphy0>; fsl,magic-packet; status = "okay"; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi index f6972deb5e39..865528b134d8 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dtsi +++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi @@ -213,7 +213,7 @@ &fec2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet2>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <ðphy2>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi index d556f7c541ce..49554af90a4e 100644 --- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi +++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi @@ -215,7 +215,7 @@ flash0: n25q256a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "micron,n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; spi-max-frequency = <29000000>; spi-rx-bus-width = <4>; spi-tx-bus-width = <4>; diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index c1ed83131b49..1dcec20328df 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -115,6 +115,18 @@ fsl,magic-packet; }; +&flexcan1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_flexcan1>; + status = "disabled"; +}; + +&flexcan2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_flexcan2>; + status = "disabled"; +}; + &gpmi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpmi_nand>; @@ -319,7 +331,6 @@ assigned-clock-rates = <400000000>; bus-width = <8>; fsl,tuning-step = <2>; - max-frequency = <100000000>; vmmc-supply = <®_module_3v3>; vqmmc-supply = <®_DCDC3>; non-removable; @@ -328,12 +339,11 @@ &iomuxc { pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_gpio1 &pinctrl_gpio2 &pinctrl_gpio3 &pinctrl_gpio4>; + pinctrl-0 = <&pinctrl_gpio1 &pinctrl_gpio2 &pinctrl_gpio3 &pinctrl_gpio4 + &pinctrl_gpio7 &pinctrl_usbc_det>; pinctrl_gpio1: gpio1-grp { fsl,pins = < - MX7D_PAD_ENET1_RGMII_RD3__GPIO7_IO3 0x74 /* SODIMM 55 */ - MX7D_PAD_ENET1_RGMII_RD2__GPIO7_IO2 0x74 /* SODIMM 63 */ MX7D_PAD_SAI1_RX_SYNC__GPIO6_IO16 0x14 /* SODIMM 77 */ MX7D_PAD_EPDC_DATA09__GPIO2_IO9 0x14 /* SODIMM 89 */ MX7D_PAD_EPDC_DATA08__GPIO2_IO8 0x74 /* SODIMM 91 */ @@ -414,6 +424,13 @@ >; }; + pinctrl_gpio7: gpio7-grp { /* Alternatively CAN1 */ + fsl,pins = < + MX7D_PAD_ENET1_RGMII_RD3__GPIO7_IO3 0x14 /* SODIMM 55 */ + MX7D_PAD_ENET1_RGMII_RD2__GPIO7_IO2 0x14 /* SODIMM 63 */ + >; + }; + pinctrl_i2c1_int: i2c1-int-grp { /* PMIC / TOUCH */ fsl,pins = < MX7D_PAD_GPIO1_IO13__GPIO1_IO13 0x79 @@ -428,7 +445,6 @@ pinctrl_enet1: enet1grp { fsl,pins = < - MX7D_PAD_ENET1_CRS__GPIO7_IO14 0x14 MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL 0x73 MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0 0x73 MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1 0x73 @@ -457,10 +473,17 @@ >; }; + pinctrl_flexcan1: flexcan1-grp { + fsl,pins = < + MX7D_PAD_ENET1_RGMII_RD3__FLEXCAN1_TX 0x79 /* SODIMM 55 */ + MX7D_PAD_ENET1_RGMII_RD2__FLEXCAN1_RX 0x79 /* SODIMM 63 */ + >; + }; + pinctrl_flexcan2: flexcan2-grp { fsl,pins = < - MX7D_PAD_GPIO1_IO14__FLEXCAN2_RX 0x59 - MX7D_PAD_GPIO1_IO15__FLEXCAN2_TX 0x59 + MX7D_PAD_GPIO1_IO14__FLEXCAN2_RX 0x79 /* SODIMM 188 */ + MX7D_PAD_GPIO1_IO15__FLEXCAN2_TX 0x79 /* SODIMM 178 */ >; }; @@ -596,6 +619,12 @@ >; }; + pinctrl_usbc_det: gpio-usbc-det { + fsl,pins = < + MX7D_PAD_ENET1_CRS__GPIO7_IO14 0x14 + >; + }; + pinctrl_usbh_reg: gpio-usbh-vbus { fsl,pins = < MX7D_PAD_UART3_CTS_B__GPIO4_IO7 0x14 /* SODIMM 129 USBH PEN */ diff --git a/arch/arm/boot/dts/imx7s-colibri.dtsi b/arch/arm/boot/dts/imx7s-colibri.dtsi index 1fb1ec5d3d70..6d16e32aed89 100644 --- a/arch/arm/boot/dts/imx7s-colibri.dtsi +++ b/arch/arm/boot/dts/imx7s-colibri.dtsi @@ -49,3 +49,7 @@ reg = <0x80000000 0x10000000>; }; }; + +&gpmi { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 106711d2c01b..e1493fc55cdf 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -448,7 +448,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302d0000 0x10000>; interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT1_ROOT_CLK>, <&clks IMX7D_GPT1_ROOT_CLK>; clock-names = "ipg", "per"; }; @@ -457,7 +457,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302e0000 0x10000>; interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT2_ROOT_CLK>, <&clks IMX7D_GPT2_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -467,7 +467,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302f0000 0x10000>; interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT3_ROOT_CLK>, <&clks IMX7D_GPT3_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -477,7 +477,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x30300000 0x10000>; interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT4_ROOT_CLK>, <&clks IMX7D_GPT4_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index d6b711011cba..efb05cc48521 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -36,10 +36,10 @@ #address-cells = <1>; #size-cells = <0>; - cpu0: cpu@0 { + cpu0: cpu@f00 { compatible = "arm,cortex-a7"; device_type = "cpu"; - reg = <0>; + reg = <0xf00>; }; }; diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 3fdd0a72f87f..506b118e511a 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -192,3 +192,7 @@ &twl_gpio { ti,use-leds; }; + +&twl_keypad { + status = "disabled"; +}; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 464df4290ffc..cf0c991bb15b 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -728,7 +728,7 @@ }; mdio0: mdio@2d24000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; @@ -737,7 +737,7 @@ }; mdio1: mdio@2d64000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 40c11b6b217a..4dd8a4f6b393 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -169,8 +169,8 @@ gpu_opp_table: gpu-opp-table { compatible = "operating-points-v2"; - opp-182150000 { - opp-hz = /bits/ 64 <182150000>; + opp-182142857 { + opp-hz = /bits/ 64 <182142857>; opp-microvolt = <1150000>; }; opp-318750000 { @@ -271,7 +271,7 @@ &aobus { pmu: pmu@e0 { compatible = "amlogic,meson8-pmu", "syscon"; - reg = <0xe0 0x8>; + reg = <0xe0 0x18>; }; pinctrl_aobus: pinctrl@84 { diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index ec67f49116d9..c95510001dc1 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -165,8 +165,8 @@ opp-hz = /bits/ 64 <255000000>; opp-microvolt = <1100000>; }; - opp-364300000 { - opp-hz = /bits/ 64 <364300000>; + opp-364285714 { + opp-hz = /bits/ 64 <364285714>; opp-microvolt = <1100000>; }; opp-425000000 { diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index d1eae47b83f6..ab91c4ebb146 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -13,8 +13,10 @@ #interrupt-cells = <2>; #address-cells = <1>; #size-cells = <0>; - spi-max-frequency = <3000000>; + spi-max-frequency = <9600000>; spi-cs-high; + spi-cpol; + spi-cpha; cpcap_adc: adc { compatible = "motorola,mapphone-cpcap-adc"; @@ -160,12 +162,12 @@ regulator-enable-ramp-delay = <1000>; }; - /* Used by DSS */ + /* Used by DSS and is the "zerov_regulator" trigger for SoC off mode */ vcsi: VCSI { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-enable-ramp-delay = <1000>; - regulator-boot-on; + regulator-always-on; }; vdac: VDAC { diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 84a5ade1e865..4227da71cc62 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -105,6 +105,14 @@ linux,code = <SW_FRONT_PROXIMITY>; linux,can-disable; }; + + machine_cover { + label = "Machine Cover"; + gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */ + linux,input-type = <EV_SW>; + linux,code = <SW_MACHINE_COVER>; + linux,can-disable; + }; }; isp1707: isp1707 { @@ -155,6 +163,12 @@ pwms = <&pwm9 0 26316 0>; /* 38000 Hz */ }; + rom_rng: rng { + compatible = "nokia,n900-rom-rng"; + clocks = <&rng_ick>; + clock-names = "ick"; + }; + /* controlled (enabled/disabled) directly by bcm2048 and wl1251 */ vctcxo: vctcxo { compatible = "fixed-clock"; @@ -808,10 +822,6 @@ pinctrl-0 = <&mmc1_pins>; vmmc-supply = <&vmmc1>; bus-width = <4>; - /* For debugging, it is often good idea to remove this GPIO. - It means you can remove back cover (to reboot by removing - battery) and still use the MMC card. */ - cd-gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */ }; /* most boards use vaux3, only some old versions use vmmc2 instead */ @@ -843,34 +853,46 @@ compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */ + /* + * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported + * bootloader set values when booted with v5.1 + * (OneNAND Manufacturer: Samsung): + * + * cs0 GPMC_CS_CONFIG1: 0xfb001202 + * cs0 GPMC_CS_CONFIG2: 0x00111100 + * cs0 GPMC_CS_CONFIG3: 0x00020200 + * cs0 GPMC_CS_CONFIG4: 0x11001102 + * cs0 GPMC_CS_CONFIG5: 0x03101616 + * cs0 GPMC_CS_CONFIG6: 0x90060000 + */ gpmc,sync-read; gpmc,sync-write; gpmc,burst-length = <16>; gpmc,burst-read; gpmc,burst-wrap; gpmc,burst-write; - gpmc,device-width = <2>; /* GPMC_DEVWIDTH_16BIT */ - gpmc,mux-add-data = <2>; /* GPMC_MUX_AD */ + gpmc,device-width = <2>; + gpmc,mux-add-data = <2>; gpmc,cs-on-ns = <0>; - gpmc,cs-rd-off-ns = <87>; - gpmc,cs-wr-off-ns = <87>; + gpmc,cs-rd-off-ns = <102>; + gpmc,cs-wr-off-ns = <102>; gpmc,adv-on-ns = <0>; - gpmc,adv-rd-off-ns = <10>; - gpmc,adv-wr-off-ns = <10>; - gpmc,oe-on-ns = <15>; - gpmc,oe-off-ns = <87>; + gpmc,adv-rd-off-ns = <12>; + gpmc,adv-wr-off-ns = <12>; + gpmc,oe-on-ns = <12>; + gpmc,oe-off-ns = <102>; gpmc,we-on-ns = <0>; - gpmc,we-off-ns = <87>; - gpmc,rd-cycle-ns = <112>; - gpmc,wr-cycle-ns = <112>; - gpmc,access-ns = <81>; - gpmc,page-burst-access-ns = <15>; + gpmc,we-off-ns = <102>; + gpmc,rd-cycle-ns = <132>; + gpmc,wr-cycle-ns = <132>; + gpmc,access-ns = <96>; + gpmc,page-burst-access-ns = <18>; gpmc,bus-turnaround-ns = <0>; gpmc,cycle2cycle-delay-ns = <0>; gpmc,wait-monitoring-ns = <0>; - gpmc,clk-activation-ns = <5>; - gpmc,wr-data-mux-bus-ns = <30>; - gpmc,wr-access-ns = <81>; + gpmc,clk-activation-ns = <6>; + gpmc,wr-data-mux-bus-ns = <36>; + gpmc,wr-access-ns = <96>; gpmc,sync-clk-ps = <15000>; /* diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi index ec5891718ae6..150d5be42d27 100644 --- a/arch/arm/boot/dts/omap3-pandora-common.dtsi +++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi @@ -226,6 +226,17 @@ gpio = <&gpio6 4 GPIO_ACTIVE_HIGH>; /* GPIO_164 */ }; + /* wl1251 wifi+bt module */ + wlan_en: fixed-regulator-wg7210_en { + compatible = "regulator-fixed"; + regulator-name = "vwlan"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + startup-delay-us = <50000>; + enable-active-high; + gpio = <&gpio1 23 GPIO_ACTIVE_HIGH>; + }; + /* wg7210 (wifi+bt module) 32k clock buffer */ wg7210_32k: fixed-regulator-wg7210_32k { compatible = "regulator-fixed"; @@ -522,9 +533,30 @@ /*wp-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>;*/ /* GPIO_127 */ }; -/* mmc3 is probed using pdata-quirks to pass wl1251 card data */ &mmc3 { - status = "disabled"; + vmmc-supply = <&wlan_en>; + + bus-width = <4>; + non-removable; + ti,non-removable; + cap-power-off-card; + + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins>; + + #address-cells = <1>; + #size-cells = <0>; + + wlan: wifi@1 { + compatible = "ti,wl1251"; + + reg = <1>; + + interrupt-parent = <&gpio1>; + interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_21 */ + + ti,wl1251-has-eeprom; + }; }; /* bluetooth*/ diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi index a7a04d78deeb..f24e2326cfa7 100644 --- a/arch/arm/boot/dts/omap3-tao3530.dtsi +++ b/arch/arm/boot/dts/omap3-tao3530.dtsi @@ -222,7 +222,7 @@ pinctrl-0 = <&mmc1_pins>; vmmc-supply = <&vmmc1>; vqmmc-supply = <&vsim>; - cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>; bus-width = <8>; }; diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 4454449de00c..a40fe8d49da6 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -369,7 +369,7 @@ compatible = "ti,wl1285", "ti,wl1283"; reg = <2>; /* gpio_100 with gpmc_wait2 pad as wakeirq */ - interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>, + interrupts-extended = <&gpio4 4 IRQ_TYPE_LEVEL_HIGH>, <&omap4_pmx_core 0x4e>; interrupt-names = "irq", "wakeup"; ref-clock-frequency = <26000000>; diff --git a/arch/arm/boot/dts/omap4-duovero-parlor.dts b/arch/arm/boot/dts/omap4-duovero-parlor.dts index 8047e8cdb3af..4548d87534e3 100644 --- a/arch/arm/boot/dts/omap4-duovero-parlor.dts +++ b/arch/arm/boot/dts/omap4-duovero-parlor.dts @@ -139,7 +139,7 @@ ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio2>; - interrupts = <12 IRQ_TYPE_EDGE_FALLING>; /* gpio_44 */ + interrupts = <12 IRQ_TYPE_LEVEL_LOW>; /* gpio_44 */ phy-mode = "mii"; diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi index 14be2ecb62b1..55ea8b6189af 100644 --- a/arch/arm/boot/dts/omap4-panda-common.dtsi +++ b/arch/arm/boot/dts/omap4-panda-common.dtsi @@ -474,7 +474,7 @@ compatible = "ti,wl1271"; reg = <2>; /* gpio_53 with gpmc_ncs3 pad as wakeup */ - interrupts-extended = <&gpio2 21 IRQ_TYPE_EDGE_RISING>, + interrupts-extended = <&gpio2 21 IRQ_TYPE_LEVEL_HIGH>, <&omap4_pmx_core 0x3a>; interrupt-names = "irq", "wakeup"; ref-clock-frequency = <38400000>; diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 3c274965ff40..91480ac1f328 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -512,7 +512,7 @@ compatible = "ti,wl1281"; reg = <2>; interrupt-parent = <&gpio1>; - interrupts = <21 IRQ_TYPE_EDGE_RISING>; /* gpio 53 */ + interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* gpio 53 */ ref-clock-frequency = <26000000>; tcxo-clock-frequency = <26000000>; }; diff --git a/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi b/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi index 6dbbc9b3229c..d0032213101e 100644 --- a/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi +++ b/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi @@ -69,7 +69,7 @@ compatible = "ti,wl1271"; reg = <2>; interrupt-parent = <&gpio2>; - interrupts = <9 IRQ_TYPE_EDGE_RISING>; /* gpio 41 */ + interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; /* gpio 41 */ ref-clock-frequency = <38400000>; }; }; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index c43e52fd5f65..e9c5ee154786 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -331,8 +331,8 @@ target-module@56000000 { compatible = "ti,sysc-omap4", "ti,sysc"; ti,hwmods = "gpu"; - reg = <0x5601fc00 0x4>, - <0x5601fc10 0x4>; + reg = <0x5600fe00 0x4>, + <0x5600fe10 0x4>; reg-names = "rev", "sysc"; ti,sysc-midle = <SYSC_IDLE_FORCE>, <SYSC_IDLE_NO>, diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 7fff555ee394..68ac04641bdb 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -362,7 +362,7 @@ pinctrl-names = "default"; pinctrl-0 = <&wlcore_irq_pin>; interrupt-parent = <&gpio1>; - interrupts = <14 IRQ_TYPE_EDGE_RISING>; /* gpio 14 */ + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; /* gpio 14 */ ref-clock-frequency = <26000000>; }; }; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index edfd26c03462..d6cb07a822f5 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -143,6 +143,7 @@ #address-cells = <1>; #size-cells = <1>; ranges = <0 0 0 0xc0000000>; + dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>; ti,hwmods = "l3_main_1", "l3_main_2", "l3_main_3"; reg = <0 0x44000000 0 0x2000>, <0 0x44800000 0 0x3000>, diff --git a/arch/arm/boot/dts/ox810se.dtsi b/arch/arm/boot/dts/ox810se.dtsi index 9f6c2b660ed3..0755e5864c4a 100644 --- a/arch/arm/boot/dts/ox810se.dtsi +++ b/arch/arm/boot/dts/ox810se.dtsi @@ -323,8 +323,8 @@ interrupt-controller; reg = <0 0x200>; #interrupt-cells = <1>; - valid-mask = <0xFFFFFFFF>; - clear-mask = <0>; + valid-mask = <0xffffffff>; + clear-mask = <0xffffffff>; }; timer0: timer@200 { diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi index c9b327732063..90846a7655b4 100644 --- a/arch/arm/boot/dts/ox820.dtsi +++ b/arch/arm/boot/dts/ox820.dtsi @@ -240,8 +240,8 @@ reg = <0 0x200>; interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>; #interrupt-cells = <1>; - valid-mask = <0xFFFFFFFF>; - clear-mask = <0>; + valid-mask = <0xffffffff>; + clear-mask = <0xffffffff>; }; timer0: timer@200 { diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index dd865f3c2eda..4447f45f0cba 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -131,7 +131,14 @@ cmt1: timer@e6130000 { compatible = "renesas,r8a73a4-cmt1", "renesas,rcar-gen2-cmt1"; reg = <0 0xe6130000 0 0x1004>; - interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>; clocks = <&mstp3_clks R8A73A4_CLK_CMT1>; clock-names = "fck"; power-domains = <&pd_c5>; diff --git a/arch/arm/boot/dts/r8a7740.dtsi b/arch/arm/boot/dts/r8a7740.dtsi index 12ffe73bf2bc..155f58e6d4e8 100644 --- a/arch/arm/boot/dts/r8a7740.dtsi +++ b/arch/arm/boot/dts/r8a7740.dtsi @@ -479,7 +479,7 @@ cpg_clocks: cpg_clocks@e6150000 { compatible = "renesas,r8a7740-cpg-clocks"; reg = <0xe6150000 0x10000>; - clocks = <&extal1_clk>, <&extalr_clk>; + clocks = <&extal1_clk>, <&extal2_clk>, <&extalr_clk>; #clock-cells = <1>; clock-output-names = "system", "pllc0", "pllc1", "pllc2", "r", diff --git a/arch/arm/boot/dts/r8a7743.dtsi b/arch/arm/boot/dts/r8a7743.dtsi index de981d629bdd..fdd267819319 100644 --- a/arch/arm/boot/dts/r8a7743.dtsi +++ b/arch/arm/boot/dts/r8a7743.dtsi @@ -338,7 +338,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -348,7 +348,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -357,7 +357,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -367,7 +367,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -376,7 +376,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -386,7 +386,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7744.dtsi b/arch/arm/boot/dts/r8a7744.dtsi index fa74a262107b..8264481bf876 100644 --- a/arch/arm/boot/dts/r8a7744.dtsi +++ b/arch/arm/boot/dts/r8a7744.dtsi @@ -338,7 +338,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -348,7 +348,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -357,7 +357,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -367,7 +367,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -376,7 +376,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -386,7 +386,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7745.dtsi b/arch/arm/boot/dts/r8a7745.dtsi index c53f7ff20695..c306713f2ab7 100644 --- a/arch/arm/boot/dts/r8a7745.dtsi +++ b/arch/arm/boot/dts/r8a7745.dtsi @@ -302,7 +302,7 @@ resets = <&cpg 407>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -312,7 +312,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -321,7 +321,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -331,7 +331,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -340,7 +340,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -350,7 +350,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index 3ff259207527..c2400e440b93 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -68,6 +68,14 @@ <0xf0000100 0x100>; }; + timer@f0000200 { + compatible = "arm,cortex-a9-global-timer"; + reg = <0xf0000200 0x100>; + interrupts = <GIC_PPI 11 + (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>; + clocks = <&cpg_clocks R8A7779_CLK_ZS>; + }; + timer@f0000600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xf0000600 0x20>; diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 5a2747758f67..e3ba00a22eeb 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -427,7 +427,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -437,7 +437,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -446,7 +446,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -456,7 +456,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -465,7 +465,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -475,7 +475,7 @@ status = "disabled"; }; - ipmmu_rt: mmu@ffc80000 { + ipmmu_rt: iommu@ffc80000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xffc80000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index 6f875502453c..a26f86ccc579 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -350,7 +350,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -360,7 +360,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -369,7 +369,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -379,7 +379,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -388,7 +388,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -398,7 +398,7 @@ status = "disabled"; }; - ipmmu_rt: mmu@ffc80000 { + ipmmu_rt: iommu@ffc80000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xffc80000 0 0x1000>; @@ -407,7 +407,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7793-gose.dts b/arch/arm/boot/dts/r8a7793-gose.dts index f51601af89a2..68da21cbdd8f 100644 --- a/arch/arm/boot/dts/r8a7793-gose.dts +++ b/arch/arm/boot/dts/r8a7793-gose.dts @@ -339,7 +339,7 @@ reg = <0x20>; remote = <&vin1>; - port { + ports { #address-cells = <1>; #size-cells = <0>; @@ -399,7 +399,7 @@ interrupts = <2 IRQ_TYPE_LEVEL_LOW>; default-input = <0>; - port { + ports { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi index bf05110fac4e..fa3839795018 100644 --- a/arch/arm/boot/dts/r8a7793.dtsi +++ b/arch/arm/boot/dts/r8a7793.dtsi @@ -336,7 +336,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -346,7 +346,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -355,7 +355,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -365,7 +365,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -374,7 +374,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -384,7 +384,7 @@ status = "disabled"; }; - ipmmu_rt: mmu@ffc80000 { + ipmmu_rt: iommu@ffc80000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xffc80000 0 0x1000>; @@ -393,7 +393,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi index 8d797d34816e..9dd952479e68 100644 --- a/arch/arm/boot/dts/r8a7794.dtsi +++ b/arch/arm/boot/dts/r8a7794.dtsi @@ -290,7 +290,7 @@ resets = <&cpg 407>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -300,7 +300,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -309,7 +309,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -319,7 +319,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -328,7 +328,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -338,7 +338,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 0290ea4edd32..d03c95abc873 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -128,7 +128,7 @@ assigned-clocks = <&cru SCLK_GPU>; assigned-clock-rates = <100000000>; clocks = <&cru SCLK_GPU>, <&cru SCLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; resets = <&cru SRST_GPU>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/rk3188-bqedison2qc.dts b/arch/arm/boot/dts/rk3188-bqedison2qc.dts index c8b62bbd6a4a..66a0ff196eb1 100644 --- a/arch/arm/boot/dts/rk3188-bqedison2qc.dts +++ b/arch/arm/boot/dts/rk3188-bqedison2qc.dts @@ -58,20 +58,25 @@ lvds-encoder { compatible = "ti,sn75lvds83", "lvds-encoder"; - #address-cells = <1>; - #size-cells = <0>; - port@0 { - reg = <0>; - lvds_in_vop0: endpoint { - remote-endpoint = <&vop0_out_lvds>; + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lvds_in_vop0: endpoint { + remote-endpoint = <&vop0_out_lvds>; + }; }; - }; - port@1 { - reg = <1>; - lvds_out_panel: endpoint { - remote-endpoint = <&panel_in_lvds>; + port@1 { + reg = <1>; + + lvds_out_panel: endpoint { + remote-endpoint = <&panel_in_lvds>; + }; }; }; }; @@ -465,10 +470,13 @@ non-removable; pinctrl-names = "default"; pinctrl-0 = <&sd1_clk>, <&sd1_cmd>, <&sd1_bus4>; - vmmcq-supply = <&vccio_wl>; + vqmmc-supply = <&vccio_wl>; + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio3>; interrupts = <RK_PD2 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm/boot/dts/rk3228-evb.dts b/arch/arm/boot/dts/rk3228-evb.dts index 5670b33fd1bd..aed879db6c15 100644 --- a/arch/arm/boot/dts/rk3228-evb.dts +++ b/arch/arm/boot/dts/rk3228-evb.dts @@ -46,7 +46,7 @@ #address-cells = <1>; #size-cells = <0>; - phy: phy@0 { + phy: ethernet-phy@0 { compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22"; reg = <0>; clocks = <&cru SCLK_MAC_PHY>; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index da102fff96a2..46ab3740d902 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -545,7 +545,7 @@ "pp1", "ppmmu1"; clocks = <&cru ACLK_GPU>, <&cru ACLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; resets = <&cru SRST_GPU_A>; status = "disabled"; }; @@ -950,7 +950,7 @@ }; }; - spi-0 { + spi0 { spi0_clk: spi0-clk { rockchip,pins = <0 RK_PB1 2 &pcfg_pull_up>; }; @@ -968,7 +968,7 @@ }; }; - spi-1 { + spi1 { spi1_clk: spi1-clk { rockchip,pins = <0 RK_PC7 2 &pcfg_pull_up>; }; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index 97307a405e60..bce0b05ef7bf 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -84,7 +84,7 @@ compatible = "arm,mali-400"; reg = <0x10090000 0x10000>; clocks = <&cru ACLK_GPU>, <&cru ACLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; assigned-clocks = <&cru ACLK_GPU>; assigned-clock-rates = <100000000>; resets = <&cru SRST_GPU>; diff --git a/arch/arm/boot/dts/s3c6410-mini6410.dts b/arch/arm/boot/dts/s3c6410-mini6410.dts index 0e159c884f97..1aeac33b0d34 100644 --- a/arch/arm/boot/dts/s3c6410-mini6410.dts +++ b/arch/arm/boot/dts/s3c6410-mini6410.dts @@ -165,6 +165,10 @@ }; }; +&clocks { + clocks = <&fin_pll>; +}; + &sdhci0 { pinctrl-names = "default"; pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>; diff --git a/arch/arm/boot/dts/s3c6410-smdk6410.dts b/arch/arm/boot/dts/s3c6410-smdk6410.dts index a9a5689dc462..3bf6c450a26e 100644 --- a/arch/arm/boot/dts/s3c6410-smdk6410.dts +++ b/arch/arm/boot/dts/s3c6410-smdk6410.dts @@ -69,6 +69,10 @@ }; }; +&clocks { + clocks = <&fin_pll>; +}; + &sdhci0 { pinctrl-names = "default"; pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>; diff --git a/arch/arm/boot/dts/s5pv210-aries.dtsi b/arch/arm/boot/dts/s5pv210-aries.dtsi index 8ff70b856334..d419b77201f7 100644 --- a/arch/arm/boot/dts/s5pv210-aries.dtsi +++ b/arch/arm/boot/dts/s5pv210-aries.dtsi @@ -454,6 +454,7 @@ pinctrl-names = "default"; cap-sd-highspeed; cap-mmc-highspeed; + keep-power-in-suspend; mmc-pwrseq = <&wifi_pwrseq>; non-removable; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 3bbc84bf8dbf..7a7c229a4ed6 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1188,49 +1188,49 @@ usart0_clk: usart0_clk { #clock-cells = <0>; reg = <12>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart1_clk: usart1_clk { #clock-cells = <0>; reg = <13>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart2_clk: usart2_clk { #clock-cells = <0>; reg = <14>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart3_clk: usart3_clk { #clock-cells = <0>; reg = <15>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; twi0_clk: twi0_clk { reg = <18>; #clock-cells = <0>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi1_clk: twi1_clk { #clock-cells = <0>; reg = <19>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi2_clk: twi2_clk { #clock-cells = <0>; reg = <20>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; mci0_clk: mci0_clk { @@ -1246,19 +1246,19 @@ spi0_clk: spi0_clk { #clock-cells = <0>; reg = <24>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; spi1_clk: spi1_clk { #clock-cells = <0>; reg = <25>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; tcb0_clk: tcb0_clk { #clock-cells = <0>; reg = <26>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; pwm_clk: pwm_clk { @@ -1269,7 +1269,7 @@ adc_clk: adc_clk { #clock-cells = <0>; reg = <29>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; dma0_clk: dma0_clk { @@ -1300,13 +1300,13 @@ ssc0_clk: ssc0_clk { #clock-cells = <0>; reg = <38>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; ssc1_clk: ssc1_clk { #clock-cells = <0>; reg = <39>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; sha_clk: sha_clk { diff --git a/arch/arm/boot/dts/sama5d3_can.dtsi b/arch/arm/boot/dts/sama5d3_can.dtsi index cf06a018ed0f..2470dd3fff25 100644 --- a/arch/arm/boot/dts/sama5d3_can.dtsi +++ b/arch/arm/boot/dts/sama5d3_can.dtsi @@ -36,13 +36,13 @@ can0_clk: can0_clk { #clock-cells = <0>; reg = <40>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; can1_clk: can1_clk { #clock-cells = <0>; reg = <41>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_tcb1.dtsi b/arch/arm/boot/dts/sama5d3_tcb1.dtsi index 1584035daf51..215802b8db30 100644 --- a/arch/arm/boot/dts/sama5d3_tcb1.dtsi +++ b/arch/arm/boot/dts/sama5d3_tcb1.dtsi @@ -22,6 +22,7 @@ tcb1_clk: tcb1_clk { #clock-cells = <0>; reg = <27>; + atmel,clk-output-range = <0 166000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_uart.dtsi b/arch/arm/boot/dts/sama5d3_uart.dtsi index 4316bdbdc25d..cb62adbd28ed 100644 --- a/arch/arm/boot/dts/sama5d3_uart.dtsi +++ b/arch/arm/boot/dts/sama5d3_uart.dtsi @@ -41,13 +41,13 @@ uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart1_clk: uart1_clk { #clock-cells = <0>; reg = <17>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index ec1966480f2f..9a14b5fe3b57 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -709,7 +709,7 @@ }; }; - L2: l2-cache@fffef000 { + L2: cache-controller@fffef000 { compatible = "arm,pl310-cache"; reg = <0xfffef000 0x1000>; interrupts = <0 38 0x04>; diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index ae24599d5829..6378e6461bf7 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -633,7 +633,7 @@ reg = <0xffcfb100 0x80>; }; - L2: l2-cache@fffff000 { + L2: cache-controller@fffff000 { compatible = "arm,pl310-cache"; reg = <0xfffff000 0x1000>; interrupts = <0 18 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/stihxxx-b2120.dtsi b/arch/arm/boot/dts/stihxxx-b2120.dtsi index 60e11045ad76..d051f080e52e 100644 --- a/arch/arm/boot/dts/stihxxx-b2120.dtsi +++ b/arch/arm/boot/dts/stihxxx-b2120.dtsi @@ -46,7 +46,7 @@ /* DAC */ format = "i2s"; mclk-fs = <256>; - frame-inversion = <1>; + frame-inversion; cpu { sound-dai = <&sti_uni_player2>; }; diff --git a/arch/arm/boot/dts/stm32f469-disco.dts b/arch/arm/boot/dts/stm32f469-disco.dts index a3ff04940aec..c6dc6d1a051b 100644 --- a/arch/arm/boot/dts/stm32f469-disco.dts +++ b/arch/arm/boot/dts/stm32f469-disco.dts @@ -76,6 +76,13 @@ regulator-max-microvolt = <3300000>; }; + vdd_dsi: vdd-dsi { + compatible = "regulator-fixed"; + regulator-name = "vdd_dsi"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + soc { dma-ranges = <0xc0000000 0x0 0x10000000>; }; @@ -155,6 +162,7 @@ compatible = "orisetech,otm8009a"; reg = <0>; /* dsi virtual channel (0..3) */ reset-gpios = <&gpioh 7 GPIO_ACTIVE_LOW>; + power-supply = <&vdd_dsi>; status = "okay"; port { diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 2afeee65c3ea..e7cf7c63ab15 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -787,7 +787,7 @@ interrupt-names = "int0", "int1"; clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; + bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; status = "disabled"; }; @@ -800,7 +800,7 @@ interrupt-names = "int0", "int1"; clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; + bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index e88daa4ef1af..d777ec70a6f0 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -198,7 +198,7 @@ default-pool { compatible = "shared-dma-pool"; size = <0x6000000>; - alloc-ranges = <0x4a000000 0x6000000>; + alloc-ranges = <0x40000000 0x10000000>; reusable; linux,cma-default; }; @@ -520,6 +520,7 @@ interrupts = <39>; clocks = <&ccu CLK_AHB_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -529,6 +530,7 @@ interrupts = <64>; clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -608,6 +610,7 @@ interrupts = <40>; clocks = <&ccu CLK_AHB_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -617,6 +620,7 @@ interrupts = <65>; clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun5i.dtsi b/arch/arm/boot/dts/sun5i.dtsi index 2fb438c4fe9d..c0775e391df8 100644 --- a/arch/arm/boot/dts/sun5i.dtsi +++ b/arch/arm/boot/dts/sun5i.dtsi @@ -117,7 +117,7 @@ default-pool { compatible = "shared-dma-pool"; size = <0x6000000>; - alloc-ranges = <0x4a000000 0x6000000>; + alloc-ranges = <0x40000000 0x10000000>; reusable; linux,cma-default; }; @@ -391,6 +391,7 @@ interrupts = <39>; clocks = <&ccu CLK_AHB_EHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -400,6 +401,7 @@ interrupts = <40>; clocks = <&ccu CLK_USB_OHCI>, <&ccu CLK_AHB_OHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index c04efad81bbc..fa983f9ff5f5 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -543,6 +543,7 @@ clocks = <&ccu CLK_AHB1_EHCI0>; resets = <&ccu RST_AHB1_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -553,6 +554,7 @@ clocks = <&ccu CLK_AHB1_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_AHB1_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -563,6 +565,7 @@ clocks = <&ccu CLK_AHB1_EHCI1>; resets = <&ccu RST_AHB1_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -573,6 +576,7 @@ clocks = <&ccu CLK_AHB1_OHCI1>, <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_AHB1_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 9ad8e445b240..259b9e1555b6 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -180,7 +180,7 @@ default-pool { compatible = "shared-dma-pool"; size = <0x6000000>; - alloc-ranges = <0x4a000000 0x6000000>; + alloc-ranges = <0x40000000 0x10000000>; reusable; linux,cma-default; }; @@ -612,6 +612,7 @@ interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -621,6 +622,7 @@ interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -703,6 +705,7 @@ interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -712,6 +715,7 @@ interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi index af2fa694a467..f76aad0c5d4d 100644 --- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi @@ -307,6 +307,7 @@ clocks = <&ccu CLK_BUS_EHCI>; resets = <&ccu RST_BUS_EHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -317,6 +318,7 @@ clocks = <&ccu CLK_BUS_OHCI>, <&ccu CLK_USB_OHCI>; resets = <&ccu RST_BUS_OHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index ea299d3d84d0..bb4667643df7 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -90,7 +90,7 @@ initial-mode = <1>; /* initialize in HUB mode */ disabled-ports = <1>; intn-gpios = <&pio 7 5 GPIO_ACTIVE_HIGH>; /* PH5 */ - reset-gpios = <&pio 4 16 GPIO_ACTIVE_HIGH>; /* PE16 */ + reset-gpios = <&pio 4 16 GPIO_ACTIVE_LOW>; /* PE16 */ connect-gpios = <&pio 4 17 GPIO_ACTIVE_HIGH>; /* PE17 */ refclk-frequency = <19200000>; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts index 66d078053d5f..7ed0fcec004d 100644 --- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts +++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts @@ -192,6 +192,7 @@ vqmmc-supply = <®_dldo1>; non-removable; wakeup-source; + keep-power-in-suspend; status = "okay"; brcmf: wifi@1 { @@ -357,8 +358,8 @@ }; ®_dldo3 { - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; regulator-name = "vdd-csi"; }; @@ -481,7 +482,8 @@ }; &usbphy { - usb0_id_det-gpios = <&pio 7 11 GPIO_ACTIVE_HIGH>; /* PH11 */ + usb0_id_det-gpios = <&pio 7 11 (GPIO_ACTIVE_HIGH | GPIO_PULL_UP)>; /* PH11 */ + usb0_vbus_power-supply = <&usb_power_supply>; usb0_vbus-supply = <®_drivevbus>; usb1_vbus-supply = <®_vmain>; usb2_vbus-supply = <®_vmain>; diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi index 392b0cabbf0d..10cc220c7034 100644 --- a/arch/arm/boot/dts/sun8i-a83t.dtsi +++ b/arch/arm/boot/dts/sun8i-a83t.dtsi @@ -313,11 +313,11 @@ display_clocks: clock@1000000 { compatible = "allwinner,sun8i-a83t-de2-clk"; - reg = <0x01000000 0x100000>; - clocks = <&ccu CLK_PLL_DE>, - <&ccu CLK_BUS_DE>; - clock-names = "mod", - "bus"; + reg = <0x01000000 0x10000>; + clocks = <&ccu CLK_BUS_DE>, + <&ccu CLK_PLL_DE>; + clock-names = "bus", + "mod"; resets = <&ccu RST_BUS_DE>; #clock-cells = <1>; #reset-cells = <1>; @@ -632,6 +632,7 @@ clocks = <&ccu CLK_BUS_EHCI0>; resets = <&ccu RST_BUS_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -643,6 +644,7 @@ clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -654,6 +656,7 @@ clocks = <&ccu CLK_BUS_EHCI1>; resets = <&ccu RST_BUS_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-h2-plus-bananapi-m2-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-bananapi-m2-zero.dts index 78a37a47185a..07784d76dd53 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-bananapi-m2-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-bananapi-m2-zero.dts @@ -31,7 +31,7 @@ pwr_led { label = "bananapi-m2-zero:red:pwr"; - gpios = <&r_pio 0 10 GPIO_ACTIVE_HIGH>; /* PL10 */ + gpios = <&r_pio 0 10 GPIO_ACTIVE_LOW>; /* PL10 */ default-state = "on"; }; }; diff --git a/arch/arm/boot/dts/sun8i-h3.dtsi b/arch/arm/boot/dts/sun8i-h3.dtsi index e37c30e811d3..6056f206c9e3 100644 --- a/arch/arm/boot/dts/sun8i-h3.dtsi +++ b/arch/arm/boot/dts/sun8i-h3.dtsi @@ -80,7 +80,7 @@ #cooling-cells = <2>; }; - cpu@1 { + cpu1: cpu@1 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <1>; @@ -90,7 +90,7 @@ #cooling-cells = <2>; }; - cpu@2 { + cpu2: cpu@2 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <2>; @@ -100,7 +100,7 @@ #cooling-cells = <2>; }; - cpu@3 { + cpu3: cpu@3 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <3>; @@ -111,6 +111,15 @@ }; }; + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + timer { compatible = "arm,armv7-timer"; interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>, diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index bb856e53b806..cf771e99cb89 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -118,11 +118,11 @@ display_clocks: clock@1000000 { compatible = "allwinner,sun8i-r40-de2-clk", "allwinner,sun8i-h3-de2-clk"; - reg = <0x01000000 0x100000>; - clocks = <&ccu CLK_DE>, - <&ccu CLK_BUS_DE>; - clock-names = "mod", - "bus"; + reg = <0x01000000 0x10000>; + clocks = <&ccu CLK_BUS_DE>, + <&ccu CLK_DE>; + clock-names = "bus", + "mod"; resets = <&ccu RST_BUS_DE>; #clock-cells = <1>; #reset-cells = <1>; @@ -266,6 +266,16 @@ #phy-cells = <1>; }; + ahci: sata@1c18000 { + compatible = "allwinner,sun8i-r40-ahci"; + reg = <0x01c18000 0x1000>; + interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>; + resets = <&ccu RST_BUS_SATA>; + reset-names = "ahci"; + status = "disabled"; + }; + ehci1: usb@1c19000 { compatible = "allwinner,sun8i-r40-ehci", "generic-ehci"; reg = <0x01c19000 0x100>; @@ -273,6 +283,7 @@ clocks = <&ccu CLK_BUS_EHCI1>; resets = <&ccu RST_BUS_EHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -284,6 +295,7 @@ <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -294,6 +306,7 @@ clocks = <&ccu CLK_BUS_EHCI2>; resets = <&ccu RST_BUS_EHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -305,6 +318,7 @@ <&ccu CLK_USB_OHCI2>; resets = <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -552,17 +566,6 @@ #size-cells = <0>; }; - ahci: sata@1c18000 { - compatible = "allwinner,sun8i-r40-ahci"; - reg = <0x01c18000 0x1000>; - interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>; - resets = <&ccu RST_BUS_SATA>; - reset-names = "ahci"; - status = "disabled"; - - }; - gmac: ethernet@1c50000 { compatible = "allwinner,sun8i-r40-gmac"; syscon = <&ccu>; diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index df72b1719c34..98d1920a7620 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -103,11 +103,11 @@ display_clocks: clock@1000000 { compatible = "allwinner,sun8i-v3s-de2-clk"; - reg = <0x01000000 0x100000>; - clocks = <&ccu CLK_DE>, - <&ccu CLK_BUS_DE>; - clock-names = "mod", - "bus"; + reg = <0x01000000 0x10000>; + clocks = <&ccu CLK_BUS_DE>, + <&ccu CLK_DE>; + clock-names = "bus", + "mod"; resets = <&ccu RST_BUS_DE>; #clock-cells = <1>; #reset-cells = <1>; diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi index 0c1eec9000e3..c1aa26db44ae 100644 --- a/arch/arm/boot/dts/sun9i-a80.dtsi +++ b/arch/arm/boot/dts/sun9i-a80.dtsi @@ -342,6 +342,7 @@ clocks = <&usb_clocks CLK_BUS_HCI0>; resets = <&usb_clocks RST_USB0_HCI>; phys = <&usbphy1>; + phy-names = "usb"; status = "disabled"; }; @@ -353,6 +354,7 @@ <&usb_clocks CLK_USB_OHCI0>; resets = <&usb_clocks RST_USB0_HCI>; phys = <&usbphy1>; + phy-names = "usb"; status = "disabled"; }; @@ -374,6 +376,7 @@ clocks = <&usb_clocks CLK_BUS_HCI1>; resets = <&usb_clocks RST_USB1_HCI>; phys = <&usbphy2>; + phy-names = "usb"; status = "disabled"; }; @@ -403,6 +406,7 @@ clocks = <&usb_clocks CLK_BUS_HCI2>; resets = <&usb_clocks RST_USB2_HCI>; phys = <&usbphy3>; + phy-names = "usb"; status = "disabled"; }; @@ -414,6 +418,7 @@ <&usb_clocks CLK_USB_OHCI2>; resets = <&usb_clocks RST_USB2_HCI>; phys = <&usbphy3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sunxi-bananapi-m2-plus-v1.2.dtsi b/arch/arm/boot/dts/sunxi-bananapi-m2-plus-v1.2.dtsi index 53edd1faee99..3635b251e250 100644 --- a/arch/arm/boot/dts/sunxi-bananapi-m2-plus-v1.2.dtsi +++ b/arch/arm/boot/dts/sunxi-bananapi-m2-plus-v1.2.dtsi @@ -16,16 +16,28 @@ regulator-type = "voltage"; regulator-boot-on; regulator-always-on; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1300000>; + regulator-min-microvolt = <1108475>; + regulator-max-microvolt = <1308475>; regulator-ramp-delay = <50>; /* 4ms */ gpios = <&r_pio 0 1 GPIO_ACTIVE_HIGH>; /* PL1 */ gpios-states = <0x1>; - states = <1100000 0x0 - 1300000 0x1>; + states = <1108475 0x0 + 1308475 0x1>; }; }; &cpu0 { cpu-supply = <®_vdd_cpux>; }; + +&cpu1 { + cpu-supply = <®_vdd_cpux>; +}; + +&cpu2 { + cpu-supply = <®_vdd_cpux>; +}; + +&cpu3 { + cpu-supply = <®_vdd_cpux>; +}; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 84977d4eb97a..23bebc80164a 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -113,11 +113,11 @@ display_clocks: clock@1000000 { /* compatible is in per SoC .dtsi file */ - reg = <0x01000000 0x100000>; - clocks = <&ccu CLK_DE>, - <&ccu CLK_BUS_DE>; - clock-names = "mod", - "bus"; + reg = <0x01000000 0x10000>; + clocks = <&ccu CLK_BUS_DE>, + <&ccu CLK_DE>; + clock-names = "bus", + "mod"; resets = <&ccu RST_BUS_DE>; #clock-cells = <1>; #reset-cells = <1>; @@ -304,6 +304,7 @@ clocks = <&ccu CLK_BUS_EHCI1>, <&ccu CLK_BUS_OHCI1>; resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -315,6 +316,7 @@ <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -325,6 +327,7 @@ clocks = <&ccu CLK_BUS_EHCI2>, <&ccu CLK_BUS_OHCI2>; resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -336,6 +339,7 @@ <&ccu CLK_USB_OHCI2>; resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -346,6 +350,7 @@ clocks = <&ccu CLK_BUS_EHCI3>, <&ccu CLK_BUS_OHCI3>; resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>; phys = <&usbphy 3>; + phy-names = "usb"; status = "disabled"; }; @@ -357,6 +362,7 @@ <&ccu CLK_USB_OHCI3>; resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>; phys = <&usbphy 3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi index d3963e9eaf48..1aa543eeb96f 100644 --- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi @@ -30,20 +30,20 @@ #interrupt-cells = <1>; ranges; - flash@0,00000000 { + flash@0 { compatible = "arm,vexpress-flash", "cfi-flash"; reg = <0 0x00000000 0x04000000>, <4 0x00000000 0x04000000>; bank-width = <4>; }; - psram@1,00000000 { + psram@100000000 { compatible = "arm,vexpress-psram", "mtd-ram"; reg = <1 0x00000000 0x02000000>; bank-width = <4>; }; - ethernet@2,02000000 { + ethernet@202000000 { compatible = "smsc,lan9118", "smsc,lan9115"; reg = <2 0x02000000 0x10000>; interrupts = <15>; @@ -55,14 +55,14 @@ vddvario-supply = <&v2m_fixed_3v3>; }; - usb@2,03000000 { + usb@203000000 { compatible = "nxp,usb-isp1761"; reg = <2 0x03000000 0x20000>; interrupts = <16>; port1-otg; }; - iofpga@3,00000000 { + iofpga@300000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts index d7019e89f588..8136e0ca10d5 100644 --- a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts +++ b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts @@ -600,6 +600,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x70>; + i2c-mux-idle-disconnect; sff0_i2c: i2c@1 { #address-cells = <1>; @@ -638,6 +639,7 @@ reg = <0x71>; #address-cells = <1>; #size-cells = <0>; + i2c-mux-idle-disconnect; sff5_i2c: i2c@1 { #address-cells = <1>; diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index 407ffb7655a8..229561ecee06 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig @@ -137,6 +137,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=6 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_ASPEED_VUART=y CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_ASPEED_KCS_IPMI_BMC=y CONFIG_ASPEED_BT_IPMI_BMC=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 64eb896907bf..ab046eb41c7a 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -364,6 +364,7 @@ CONFIG_DRM_OMAP_PANEL_TPO_TD043MTEA1=m CONFIG_DRM_OMAP_PANEL_NEC_NL8048HL11=m CONFIG_DRM_TILCDC=m CONFIG_DRM_PANEL_SIMPLE=m +CONFIG_DRM_TI_TFP410=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c index ae5aefc44a4d..ffa8d73fe722 100644 --- a/arch/arm/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c @@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 99929122dad7..3546d294d55f 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -18,11 +18,11 @@ #endif #include <asm/ptrace.h> -#include <asm/domain.h> #include <asm/opcodes-virt.h> #include <asm/asm-offsets.h> #include <asm/page.h> #include <asm/thread_info.h> +#include <asm/uaccess-asm.h> #define IOMEM(x) (x) @@ -446,79 +446,6 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .size \name , . - \name .endm - .macro csdb -#ifdef CONFIG_THUMB2_KERNEL - .inst.w 0xf3af8014 -#else - .inst 0xe320f014 -#endif - .endm - - .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req -#ifndef CONFIG_CPU_USE_DOMAINS - adds \tmp, \addr, #\size - 1 - sbcscc \tmp, \tmp, \limit - bcs \bad -#ifdef CONFIG_CPU_SPECTRE - movcs \addr, #0 - csdb -#endif -#endif - .endm - - .macro uaccess_mask_range_ptr, addr:req, size:req, limit:req, tmp:req -#ifdef CONFIG_CPU_SPECTRE - sub \tmp, \limit, #1 - subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr - addhs \tmp, \tmp, #1 @ if (tmp >= 0) { - subshs \tmp, \tmp, \size @ tmp = limit - (addr + size) } - movlo \addr, #0 @ if (tmp < 0) addr = NULL - csdb -#endif - .endm - - .macro uaccess_disable, tmp, isb=1 -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - /* - * Whenever we re-enter userspace, the domains should always be - * set appropriately. - */ - mov \tmp, #DACR_UACCESS_DISABLE - mcr p15, 0, \tmp, c3, c0, 0 @ Set domain register - .if \isb - instr_sync - .endif -#endif - .endm - - .macro uaccess_enable, tmp, isb=1 -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - /* - * Whenever we re-enter userspace, the domains should always be - * set appropriately. - */ - mov \tmp, #DACR_UACCESS_ENABLE - mcr p15, 0, \tmp, c3, c0, 0 - .if \isb - instr_sync - .endif -#endif - .endm - - .macro uaccess_save, tmp -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - mrc p15, 0, \tmp, c3, c0, 0 - str \tmp, [sp, #SVC_DACR] -#endif - .endm - - .macro uaccess_restore -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - ldr r0, [sp, #SVC_DACR] - mcr p15, 0, r0, c3, c0, 0 -#endif - .endm - .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 567dbede4785..f1d0a7807cd0 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -82,7 +82,7 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_CPU_CP15_MMU -static inline unsigned int get_domain(void) +static __always_inline unsigned int get_domain(void) { unsigned int domain; @@ -94,7 +94,7 @@ static inline unsigned int get_domain(void) return domain; } -static inline void set_domain(unsigned val) +static __always_inline void set_domain(unsigned int val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" @@ -102,12 +102,12 @@ static inline void set_domain(unsigned val) isb(); } #else -static inline unsigned int get_domain(void) +static __always_inline unsigned int get_domain(void) { return 0; } -static inline void set_domain(unsigned val) +static __always_inline void set_domain(unsigned int val) { } #endif diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 83c391b597d4..fdc4ae3e7378 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -164,8 +164,13 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) preempt_enable(); #endif - if (!ret) - *oval = oldval; + /* + * Store unconditionally. If ret != 0 the extra store is the least + * of the worries but GCC cannot figure out that __futex_atomic_op() + * is either setting ret to -EFAULT or storing the old value in + * oldval which results in a uninitialized warning at the call site. + */ + *oval = oldval; return ret; } diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 6b7644a383f6..468e5b5e8be6 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -14,13 +14,25 @@ #include <asm/cputype.h> /* arm64 compatibility macros */ +#define PSR_AA32_MODE_FIQ FIQ_MODE +#define PSR_AA32_MODE_SVC SVC_MODE #define PSR_AA32_MODE_ABT ABT_MODE #define PSR_AA32_MODE_UND UND_MODE #define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_F_BIT PSR_F_BIT #define PSR_AA32_I_BIT PSR_I_BIT #define PSR_AA32_A_BIT PSR_A_BIT #define PSR_AA32_E_BIT PSR_E_BIT #define PSR_AA32_IT_MASK PSR_IT_MASK +#define PSR_AA32_GE_MASK 0x000f0000 +#define PSR_AA32_DIT_BIT 0x00200000 +#define PSR_AA32_PAN_BIT 0x00400000 +#define PSR_AA32_SSBS_BIT 0x00800000 +#define PSR_AA32_Q_BIT PSR_Q_BIT +#define PSR_AA32_V_BIT PSR_V_BIT +#define PSR_AA32_C_BIT PSR_C_BIT +#define PSR_AA32_Z_BIT PSR_Z_BIT +#define PSR_AA32_N_BIT PSR_N_BIT unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); @@ -41,6 +53,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) *__vcpu_spsr(vcpu) = v; } +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + return spsr; +} + static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num) { @@ -177,6 +194,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 7c0eddb0adb2..32fbf82e3ebc 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -14,6 +14,8 @@ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Not used on 32-bit arm */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h index f44f448537f2..1a3eedbac4a2 100644 --- a/arch/arm/include/asm/percpu.h +++ b/arch/arm/include/asm/percpu.h @@ -5,6 +5,8 @@ #ifndef _ASM_ARM_PERCPU_H_ #define _ASM_ARM_PERCPU_H_ +#include <asm/thread_info.h> + /* * Same as asm-generic/percpu.h, except that we store the per cpu offset * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7 diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h new file mode 100644 index 000000000000..907571fd05c6 --- /dev/null +++ b/arch/arm/include/asm/uaccess-asm.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_UACCESS_ASM_H__ +#define __ASM_UACCESS_ASM_H__ + +#include <asm/asm-offsets.h> +#include <asm/domain.h> +#include <asm/memory.h> +#include <asm/thread_info.h> + + .macro csdb +#ifdef CONFIG_THUMB2_KERNEL + .inst.w 0xf3af8014 +#else + .inst 0xe320f014 +#endif + .endm + + .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req +#ifndef CONFIG_CPU_USE_DOMAINS + adds \tmp, \addr, #\size - 1 + sbcscc \tmp, \tmp, \limit + bcs \bad +#ifdef CONFIG_CPU_SPECTRE + movcs \addr, #0 + csdb +#endif +#endif + .endm + + .macro uaccess_mask_range_ptr, addr:req, size:req, limit:req, tmp:req +#ifdef CONFIG_CPU_SPECTRE + sub \tmp, \limit, #1 + subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr + addhs \tmp, \tmp, #1 @ if (tmp >= 0) { + subshs \tmp, \tmp, \size @ tmp = limit - (addr + size) } + movlo \addr, #0 @ if (tmp < 0) addr = NULL + csdb +#endif + .endm + + .macro uaccess_disable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_DISABLE + mcr p15, 0, \tmp, c3, c0, 0 @ Set domain register + .if \isb + instr_sync + .endif +#endif + .endm + + .macro uaccess_enable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_ENABLE + mcr p15, 0, \tmp, c3, c0, 0 + .if \isb + instr_sync + .endif +#endif + .endm + +#if defined(CONFIG_CPU_SW_DOMAIN_PAN) || defined(CONFIG_CPU_USE_DOMAINS) +#define DACR(x...) x +#else +#define DACR(x...) +#endif + + /* + * Save the address limit on entry to a privileged exception. + * + * If we are using the DACR for kernel access by the user accessors + * (CONFIG_CPU_USE_DOMAINS=y), always reset the DACR kernel domain + * back to client mode, whether or not \disable is set. + * + * If we are using SW PAN, set the DACR user domain to no access + * if \disable is set. + */ + .macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable + ldr \tmp1, [\tsk, #TI_ADDR_LIMIT] + mov \tmp2, #TASK_SIZE + str \tmp2, [\tsk, #TI_ADDR_LIMIT] + DACR( mrc p15, 0, \tmp0, c3, c0, 0) + DACR( str \tmp0, [sp, #SVC_DACR]) + str \tmp1, [sp, #SVC_ADDR_LIMIT] + .if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN) + /* kernel=client, user=no access */ + mov \tmp2, #DACR_UACCESS_DISABLE + mcr p15, 0, \tmp2, c3, c0, 0 + instr_sync + .elseif IS_ENABLED(CONFIG_CPU_USE_DOMAINS) + /* kernel=client */ + bic \tmp2, \tmp0, #domain_mask(DOMAIN_KERNEL) + orr \tmp2, \tmp2, #domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT) + mcr p15, 0, \tmp2, c3, c0, 0 + instr_sync + .endif + .endm + + /* Restore the user access state previously saved by uaccess_entry */ + .macro uaccess_exit, tsk, tmp0, tmp1 + ldr \tmp1, [sp, #SVC_ADDR_LIMIT] + DACR( ldr \tmp0, [sp, #SVC_DACR]) + str \tmp1, [\tsk, #TI_ADDR_LIMIT] + DACR( mcr p15, 0, \tmp0, c3, c0, 0) + .endm + +#undef DACR + +#endif /* __ASM_UACCESS_ASM_H__ */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 303248e5b990..98c6b91be4a8 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -22,7 +22,7 @@ * perform such accesses (eg, via list poison values) which could then * be exploited for priviledge escalation. */ -static inline unsigned int uaccess_save_and_enable(void) +static __always_inline unsigned int uaccess_save_and_enable(void) { #ifdef CONFIG_CPU_SW_DOMAIN_PAN unsigned int old_domain = get_domain(); @@ -37,7 +37,7 @@ static inline unsigned int uaccess_save_and_enable(void) #endif } -static inline void uaccess_restore(unsigned int flags) +static __always_inline void uaccess_restore(unsigned int flags) { #ifdef CONFIG_CPU_SW_DOMAIN_PAN /* Restore the user access mask */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 8cad59465af3..8b679e2ca3c3 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -17,10 +17,14 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o reboot.o return_address.o \ + process.o ptrace.o reboot.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o +ifneq ($(CONFIG_ARM_UNWIND),y) +obj-$(CONFIG_FRAME_POINTER) += return_address.o +endif + obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 0b8cfdd60b90..9780b690c3ed 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -27,6 +27,7 @@ #include <asm/unistd.h> #include <asm/tls.h> #include <asm/system_info.h> +#include <asm/uaccess-asm.h> #include "entry-header.S" #include <asm/entry-macro-multi.S> @@ -179,15 +180,7 @@ ENDPROC(__und_invalid) stmia r7, {r2 - r6} get_thread_info tsk - ldr r0, [tsk, #TI_ADDR_LIMIT] - mov r1, #TASK_SIZE - str r1, [tsk, #TI_ADDR_LIMIT] - str r0, [sp, #SVC_ADDR_LIMIT] - - uaccess_save r0 - .if \uaccess - uaccess_disable r0 - .endif + uaccess_entry tsk, r0, r1, r2, \uaccess .if \trace #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 32051ec5b33f..40db0f9188b6 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -6,6 +6,7 @@ #include <asm/asm-offsets.h> #include <asm/errno.h> #include <asm/thread_info.h> +#include <asm/uaccess-asm.h> #include <asm/v7m.h> @ Bad Abort numbers @@ -217,9 +218,7 @@ blne trace_hardirqs_off #endif .endif - ldr r1, [sp, #SVC_ADDR_LIMIT] - uaccess_restore - str r1, [tsk, #TI_ADDR_LIMIT] + uaccess_exit tsk, r0, r1 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode SVC restore @@ -263,9 +262,7 @@ @ on the stack remains correct). @ .macro svc_exit_via_fiq - ldr r1, [sp, #SVC_ADDR_LIMIT] - uaccess_restore - str r1, [tsk, #TI_ADDR_LIMIT] + uaccess_exit tsk, r0, r1 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore mov r0, sp diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index a7810be07da1..4a3982812a40 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -68,7 +68,7 @@ ENDPROC(__vet_atags) * The following fragment of code is executed with the MMU on in MMU mode, * and uses absolute addresses; this is not position independent. * - * r0 = cp#15 control register + * r0 = cp#15 control register (exc_ret for M-class) * r1 = machine ID * r2 = atags/dtb pointer * r9 = processor ID @@ -137,7 +137,8 @@ __mmap_switched_data: #ifdef CONFIG_CPU_CP15 .long cr_alignment @ r3 #else - .long 0 @ r3 +M_CLASS(.long exc_ret) @ r3 +AR_CLASS(.long 0) @ r3 #endif .size __mmap_switched_data, . - __mmap_switched_data diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index afa350f44dea..0fc814bbc34b 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -201,6 +201,8 @@ M_CLASS(streq r3, [r12, #PMSAv8_MAIR1]) bic r0, r0, #V7M_SCB_CCR_IC #endif str r0, [r12, V7M_SCB_CCR] + /* Pass exc_ret to __mmap_switched */ + mov r0, r10 #endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */ ret lr ENDPROC(__after_proc_init) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index af8b8e15f589..35cec1c78485 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -677,6 +677,12 @@ static void disable_single_step(struct perf_event *bp) arch_install_hw_breakpoint(bp); } +static int watchpoint_fault_on_uaccess(struct pt_regs *regs, + struct arch_hw_breakpoint *info) +{ + return !user_mode(regs) && info->ctrl.privilege == ARM_BREAKPOINT_USER; +} + static void watchpoint_handler(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -736,16 +742,27 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, } pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); + + /* + * If we triggered a user watchpoint from a uaccess routine, + * then handle the stepping ourselves since userspace really + * can't help us with this. + */ + if (watchpoint_fault_on_uaccess(regs, info)) + goto step; + perf_bp_event(wp, regs); /* - * If no overflow handler is present, insert a temporary - * mismatch breakpoint so we can single-step over the - * watchpoint trigger. + * Defer stepping to the overflow handler if one is installed. + * Otherwise, insert a temporary mismatch breakpoint so that + * we can single-step over the watchpoint trigger. */ - if (is_default_overflow_handler(wp)) - enable_single_step(wp, instruction_pointer(regs)); + if (!is_default_overflow_handler(wp)) + goto unlock; +step: + enable_single_step(wp, instruction_pointer(regs)); unlock: rcu_read_unlock(); } diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index ae5020302de4..6607fa817bba 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -146,10 +146,9 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) @ make CNTP_* and CNTPCT accessible from PL1 mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 - lsr r7, #16 - and r7, #0xf - cmp r7, #1 - bne 1f + ubfx r7, r7, #16, #4 + teq r7, #0 + beq 1f mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL orr r7, r7, #3 @ PL1PCEN | PL1PCTEN mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index afcb4d3b14dc..d8ccdd5d09a3 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -219,8 +219,8 @@ static struct undef_hook arm_break_hook = { }; static struct undef_hook thumb_break_hook = { - .instr_mask = 0xffff, - .instr_val = 0xde01, + .instr_mask = 0xffffffff, + .instr_val = 0x0000de01, .cpsr_mask = PSR_T_BIT, .cpsr_val = PSR_T_BIT, .fn = break_trap, diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index b0d2f1fe891d..7b42ac010fdf 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -7,8 +7,6 @@ */ #include <linux/export.h> #include <linux/ftrace.h> - -#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) #include <linux/sched.h> #include <asm/stacktrace.h> @@ -53,6 +51,4 @@ void *return_address(unsigned int level) return NULL; } -#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ - EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 71778bb0475b..a082f6e4f0f4 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -22,6 +22,19 @@ * A simple function epilogue looks like this: * ldm sp, {fp, sp, pc} * + * When compiled with clang, pc and sp are not pushed. A simple function + * prologue looks like this when built with clang: + * + * stmdb {..., fp, lr} + * add fp, sp, #x + * sub sp, sp, #y + * + * A simple function epilogue looks like this when built with clang: + * + * sub sp, fp, #x + * ldm {..., fp, pc} + * + * * Note that with framepointer enabled, even the leaf functions have the same * prologue and epilogue, therefore we can ignore the LR value in this case. */ @@ -34,6 +47,16 @@ int notrace unwind_frame(struct stackframe *frame) low = frame->sp; high = ALIGN(low, THREAD_SIZE); +#ifdef CONFIG_CC_IS_CLANG + /* check current frame pointer is within bounds */ + if (fp < low + 4 || fp > high - 4) + return -EINVAL; + + frame->sp = frame->fp; + frame->fp = *(unsigned long *)(fp); + frame->pc = frame->lr; + frame->lr = *(unsigned long *)(fp + 4); +#else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; @@ -42,6 +65,7 @@ int notrace unwind_frame(struct stackframe *frame) frame->fp = *(unsigned long *)(fp - 12); frame->sp = *(unsigned long *)(fp - 8); frame->pc = *(unsigned long *)(fp - 4); +#endif return 0; } diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 8872acf9ff99..15f21694b532 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -93,6 +93,8 @@ static bool __init cntvct_functional(void) */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); + if (!np) goto out_put; if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 95b2e1ce559c..f8016e3db65d 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -118,7 +118,7 @@ ENTRY(arm_copy_from_user) ENDPROC(arm_copy_from_user) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index da85e64143e9..21591fff6b0e 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -591,13 +591,13 @@ static void __init at91_pm_sram_init(void) sram_pool = gen_pool_get(&pdev->dev, NULL); if (!sram_pool) { pr_warn("%s: sram pool unavailable!\n", __func__); - return; + goto out_put_device; } sram_base = gen_pool_alloc(sram_pool, at91_pm_suspend_in_sram_sz); if (!sram_base) { pr_warn("%s: unable to alloc sram!\n", __func__); - return; + goto out_put_device; } sram_pbase = gen_pool_virt_to_phys(sram_pool, sram_base); @@ -605,12 +605,17 @@ static void __init at91_pm_sram_init(void) at91_pm_suspend_in_sram_sz, false); if (!at91_suspend_sram_fn) { pr_warn("SRAM: Could not map\n"); - return; + goto out_put_device; } /* Copy the pm suspend handler to SRAM */ at91_suspend_sram_fn = fncpy(at91_suspend_sram_fn, &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz); + return; + +out_put_device: + put_device(&pdev->dev); + return; } static bool __init at91_is_pm_mode_active(int pm_mode) @@ -690,6 +695,12 @@ static void __init at91_pm_use_default_mode(int pm_mode) soc_pm.data.suspend_mode = AT91_PM_ULP0; } +static const struct of_device_id atmel_shdwc_ids[] = { + { .compatible = "atmel,sama5d2-shdwc" }, + { .compatible = "microchip,sam9x60-shdwc" }, + { /* sentinel. */ } +}; + static void __init at91_pm_modes_init(void) { struct device_node *np; @@ -699,7 +710,7 @@ static void __init at91_pm_modes_init(void) !at91_is_pm_mode_active(AT91_PM_ULP1)) return; - np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-shdwc"); + np = of_find_matching_node(NULL, atmel_shdwc_ids); if (!np) { pr_warn("%s: failed to find shdwc!\n", __func__); goto ulp1_default; @@ -750,6 +761,7 @@ static const struct of_device_id atmel_pmc_ids[] __initconst = { { .compatible = "atmel,sama5d3-pmc", .data = &pmc_infos[1] }, { .compatible = "atmel,sama5d4-pmc", .data = &pmc_infos[1] }, { .compatible = "atmel,sama5d2-pmc", .data = &pmc_infos[1] }, + { .compatible = "microchip,sam9x60-pmc", .data = &pmc_infos[1] }, { /* sentinel */ }, }; diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index ed57c879d4e1..2591cba61937 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -268,6 +268,10 @@ ENDPROC(at91_backup_mode) orr tmp1, tmp1, #AT91_PMC_KEY str tmp1, [pmc, #AT91_CKGR_MOR] + /* Quirk for SAM9X60's PMC */ + nop + nop + wait_mckrdy /* Enable the crystal oscillator */ diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 2f9ae6431bf5..cebab6af31a2 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -462,8 +462,8 @@ static s8 dm365_queue_priority_mapping[][2] = { }; static const struct dma_slave_map dm365_edma_map[] = { - { "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) }, - { "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) }, + { "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) }, + { "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) }, { "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) }, { "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) }, { "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) }, diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index 9a681b421ae1..cd861c57d5ad 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -26,6 +26,7 @@ #define EXYNOS5420_USE_L2_COMMON_UP_STATE BIT(30) static void __iomem *ns_sram_base_addr __ro_after_init; +static bool secure_firmware __ro_after_init; /* * The common v7_exit_coherency_flush API could not be used because of the @@ -58,15 +59,16 @@ static void __iomem *ns_sram_base_addr __ro_after_init; static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster) { unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER); + bool state; pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER || cluster >= EXYNOS5420_NR_CLUSTERS) return -EINVAL; - if (!exynos_cpu_power_state(cpunr)) { - exynos_cpu_power_up(cpunr); - + state = exynos_cpu_power_state(cpunr); + exynos_cpu_power_up(cpunr); + if (!state && secure_firmware) { /* * This assumes the cluster number of the big cores(Cortex A15) * is 0 and the Little cores(Cortex A7) is 1. @@ -258,6 +260,8 @@ static int __init exynos_mcpm_init(void) return -ENOMEM; } + secure_firmware = exynos_secure_firmware_available(); + /* * To increase the stability of KFC reset we need to program * the PMU SPARE3 register diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index 593bf1519608..95584ee02b55 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -520,6 +520,7 @@ config SOC_IMX6UL bool "i.MX6 UltraLite support" select PINCTRL_IMX6UL select SOC_IMX6 + select ARM_ERRATA_814220 help This enables support for Freescale i.MX6 UltraLite processor. @@ -556,6 +557,7 @@ config SOC_IMX7D select PINCTRL_IMX7D select SOC_IMX7D_CA7 if ARCH_MULTI_V7 select SOC_IMX7D_CM4 if ARM_SINGLE_ARMV7M + select ARM_ERRATA_814220 if ARCH_MULTI_V7 help This enables support for Freescale i.MX7 Dual processor. diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 35ff620537e6..e7364e6c8c6b 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -91,6 +91,10 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +ifeq ($(CONFIG_ARM_CPU_SUSPEND),y) +AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SOC_IMX6) += resume-imx6.o +endif obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index 912aeceb4ff8..5aa5796cff0e 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -109,17 +109,17 @@ void imx_cpu_die(unsigned int cpu); int imx_cpu_kill(unsigned int cpu); #ifdef CONFIG_SUSPEND -void v7_cpu_resume(void); void imx53_suspend(void __iomem *ocram_vbase); extern const u32 imx53_suspend_sz; void imx6_suspend(void __iomem *ocram_vbase); #else -static inline void v7_cpu_resume(void) {} static inline void imx53_suspend(void __iomem *ocram_vbase) {} static const u32 imx53_suspend_sz; static inline void imx6_suspend(void __iomem *ocram_vbase) {} #endif +void v7_cpu_resume(void); + void imx6_pm_ccm_init(const char *ccm_compat); void imx6q_pm_init(void); void imx6dl_pm_init(void); diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h index 2a685adec1df..ae84c08e11fa 100644 --- a/arch/arm/mach-imx/devices/devices-common.h +++ b/arch/arm/mach-imx/devices/devices-common.h @@ -289,6 +289,6 @@ struct platform_device *__init imx_add_spi_imx( const struct spi_imx_master *pdata); struct platform_device *imx_add_imx_dma(char *name, resource_size_t iobase, - int irq, int irq_err); + int irq); struct platform_device *imx_add_imx_sdma(char *name, resource_size_t iobase, int irq, struct sdma_platform_data *pdata); diff --git a/arch/arm/mach-imx/devices/platform-gpio-mxc.c b/arch/arm/mach-imx/devices/platform-gpio-mxc.c index 78628ef12672..355de845224c 100644 --- a/arch/arm/mach-imx/devices/platform-gpio-mxc.c +++ b/arch/arm/mach-imx/devices/platform-gpio-mxc.c @@ -24,7 +24,8 @@ struct platform_device *__init mxc_register_gpio(char *name, int id, .flags = IORESOURCE_IRQ, }, }; + unsigned int nres; - return platform_device_register_resndata(&mxc_aips_bus, - name, id, res, ARRAY_SIZE(res), NULL, 0); + nres = irq_high ? ARRAY_SIZE(res) : ARRAY_SIZE(res) - 1; + return platform_device_register_resndata(&mxc_aips_bus, name, id, res, nres, NULL, 0); } diff --git a/arch/arm/mach-imx/devices/platform-imx-dma.c b/arch/arm/mach-imx/devices/platform-imx-dma.c index 26b47b36257b..12656f24ad0d 100644 --- a/arch/arm/mach-imx/devices/platform-imx-dma.c +++ b/arch/arm/mach-imx/devices/platform-imx-dma.c @@ -6,7 +6,7 @@ #include "devices-common.h" struct platform_device __init __maybe_unused *imx_add_imx_dma(char *name, - resource_size_t iobase, int irq, int irq_err) + resource_size_t iobase, int irq) { struct resource res[] = { { @@ -17,10 +17,6 @@ struct platform_device __init __maybe_unused *imx_add_imx_dma(char *name, .start = irq, .end = irq, .flags = IORESOURCE_IRQ, - }, { - .start = irq_err, - .end = irq_err, - .flags = IORESOURCE_IRQ, }, }; diff --git a/arch/arm/mach-imx/mm-imx21.c b/arch/arm/mach-imx/mm-imx21.c index e117d2883df9..0b4db806d905 100644 --- a/arch/arm/mach-imx/mm-imx21.c +++ b/arch/arm/mach-imx/mm-imx21.c @@ -79,8 +79,7 @@ void __init imx21_soc_init(void) mxc_register_gpio("imx21-gpio", 5, MX21_GPIO6_BASE_ADDR, SZ_256, MX21_INT_GPIO, 0); pinctrl_provide_dummies(); - imx_add_imx_dma("imx21-dma", MX21_DMA_BASE_ADDR, - MX21_INT_DMACH0, 0); /* No ERR irq */ + imx_add_imx_dma("imx21-dma", MX21_DMA_BASE_ADDR, MX21_INT_DMACH0); platform_device_register_simple("imx21-audmux", 0, imx21_audmux_res, ARRAY_SIZE(imx21_audmux_res)); } diff --git a/arch/arm/mach-imx/mm-imx27.c b/arch/arm/mach-imx/mm-imx27.c index dcbe7ec6d543..9da34d50023f 100644 --- a/arch/arm/mach-imx/mm-imx27.c +++ b/arch/arm/mach-imx/mm-imx27.c @@ -80,8 +80,7 @@ void __init imx27_soc_init(void) mxc_register_gpio("imx21-gpio", 5, MX27_GPIO6_BASE_ADDR, SZ_256, MX27_INT_GPIO, 0); pinctrl_provide_dummies(); - imx_add_imx_dma("imx27-dma", MX27_DMA_BASE_ADDR, - MX27_INT_DMACH0, 0); /* No ERR irq */ + imx_add_imx_dma("imx27-dma", MX27_DMA_BASE_ADDR, MX27_INT_DMACH0); /* imx27 has the imx21 type audmux */ platform_device_register_simple("imx21-audmux", 0, imx27_audmux_res, ARRAY_SIZE(imx27_audmux_res)); diff --git a/arch/arm/mach-imx/pm-imx5.c b/arch/arm/mach-imx/pm-imx5.c index f057df813f83..e9962b48e30c 100644 --- a/arch/arm/mach-imx/pm-imx5.c +++ b/arch/arm/mach-imx/pm-imx5.c @@ -295,14 +295,14 @@ static int __init imx_suspend_alloc_ocram( if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, size); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } phys = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -312,6 +312,8 @@ static int __init imx_suspend_alloc_ocram( if (virt_out) *virt_out = virt; +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 1c0ecad3620e..baf3b47601af 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -493,14 +493,14 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, MX6Q_SUSPEND_OCRAM_SIZE); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } ocram_pbase = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -523,7 +523,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) ret = imx6_pm_get_base(&pm_info->mmdc_base, socdata->mmdc_compat); if (ret) { pr_warn("%s: failed to get mmdc base %d!\n", __func__, ret); - goto put_node; + goto put_device; } ret = imx6_pm_get_base(&pm_info->src_base, socdata->src_compat); @@ -570,7 +570,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) &imx6_suspend, MX6Q_SUSPEND_OCRAM_SIZE - sizeof(*pm_info)); - goto put_node; + goto put_device; pl310_cache_map_failed: iounmap(pm_info->gpc_base.vbase); @@ -580,6 +580,8 @@ iomuxc_map_failed: iounmap(pm_info->src_base.vbase); src_map_failed: iounmap(pm_info->mmdc_base.vbase); +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S new file mode 100644 index 000000000000..5bd1ba7ef15b --- /dev/null +++ b/arch/arm/mach-imx/resume-imx6.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hardware/cache-l2x0.h> +#include "hardware.h" + +/* + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. + */ + +ENTRY(v7_cpu_resume) + bl v7_invalidate_l1 +#ifdef CONFIG_CACHE_L2X0 + bl l2c310_early_resume +#endif + b cpu_resume +ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 062391ff13da..1eabf2d2834b 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -327,17 +327,3 @@ resume: ret lr ENDPROC(imx6_suspend) - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig index 982eabc36163..2406cab73835 100644 --- a/arch/arm/mach-integrator/Kconfig +++ b/arch/arm/mach-integrator/Kconfig @@ -4,6 +4,8 @@ menuconfig ARCH_INTEGRATOR depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V6 select ARM_AMBA select COMMON_CLK_VERSATILE + select CMA + select DMA_CMA select HAVE_TCM select ICST select MFD_SYSCON @@ -35,14 +37,13 @@ config INTEGRATOR_IMPD1 select ARM_VIC select GPIO_PL061 select GPIOLIB + select REGULATOR + select REGULATOR_FIXED_VOLTAGE help The IM-PD1 is an add-on logic module for the Integrator which allows ARM(R) Ltd PrimeCells to be developed and evaluated. The IM-PD1 can be found on the Integrator/PP2 platform. - To compile this driver as a module, choose M here: the - module will be called impd1. - config INTEGRATOR_CM7TDMI bool "Integrator/CM7TDMI core module" depends on ARCH_INTEGRATOR_AP diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig index 880bc2a5cada..7f7002dc2b21 100644 --- a/arch/arm/mach-npcm/Kconfig +++ b/arch/arm/mach-npcm/Kconfig @@ -11,7 +11,7 @@ config ARCH_NPCM7XX depends on ARCH_MULTI_V7 select PINCTRL_NPCM7XX select NPCM7XX_TIMER - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select CACHE_L2X0 select ARM_GIC select HAVE_ARM_TWD if SMP diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 439e143cad7b..46012ca812f4 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -265,6 +265,7 @@ static int __init omapdss_init_of(void) r = of_platform_populate(node, NULL, NULL, &pdev->dev); if (r) { pr_err("Unable to populate DSS submodule devices\n"); + put_device(&pdev->dev); return r; } diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index e0350476feaa..500e694669e7 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3565,7 +3565,7 @@ static const struct omap_hwmod_reset dra7_reset_quirks[] = { }; static const struct omap_hwmod_reset omap_reset_quirks[] = { - { .match = "dss", .len = 3, .reset = omap_dss_reset, }, + { .match = "dss_core", .len = 8, .reset = omap_dss_reset, }, { .match = "hdq1w", .len = 5, .reset = omap_hdq1w_reset, }, { .match = "i2c", .len = 3, .reset = omap_i2c_reset, }, { .match = "wd_timer", .len = 8, .reset = omap2_wd_timer_reset, }, diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index b0f8c9a70c68..f53c4a331856 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -7,7 +7,6 @@ #include <linux/clk.h> #include <linux/davinci_emac.h> #include <linux/gpio.h> -#include <linux/gpio/machine.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/of_platform.h> @@ -261,14 +260,6 @@ static void __init am3517_evm_legacy_init(void) am35xx_emac_reset(); } -static struct platform_device omap3_rom_rng_device = { - .name = "omap3-rom-rng", - .id = -1, - .dev = { - .platform_data = rx51_secure_rng_call, - }, -}; - static void __init nokia_n900_legacy_init(void) { hsmmc2_internal_input_clk(); @@ -284,9 +275,6 @@ static void __init nokia_n900_legacy_init(void) pr_warn("RX-51: Not enabling ARM errata 430973 workaround\n"); pr_warn("Thumb binaries may crash randomly without this workaround\n"); } - - pr_info("RX-51: Registering OMAP3 HWRNG device\n"); - platform_device_register(&omap3_rom_rng_device); } } @@ -303,118 +291,15 @@ static void __init omap3_logicpd_torpedo_init(void) } /* omap3pandora legacy devices */ -#define PANDORA_WIFI_IRQ_GPIO 21 -#define PANDORA_WIFI_NRESET_GPIO 23 static struct platform_device pandora_backlight = { .name = "pandora-backlight", .id = -1, }; -static struct regulator_consumer_supply pandora_vmmc3_supply[] = { - REGULATOR_SUPPLY("vmmc", "omap_hsmmc.2"), -}; - -static struct regulator_init_data pandora_vmmc3 = { - .constraints = { - .valid_ops_mask = REGULATOR_CHANGE_STATUS, - }, - .num_consumer_supplies = ARRAY_SIZE(pandora_vmmc3_supply), - .consumer_supplies = pandora_vmmc3_supply, -}; - -static struct fixed_voltage_config pandora_vwlan = { - .supply_name = "vwlan", - .microvolts = 1800000, /* 1.8V */ - .startup_delay = 50000, /* 50ms */ - .init_data = &pandora_vmmc3, -}; - -static struct platform_device pandora_vwlan_device = { - .name = "reg-fixed-voltage", - .id = 1, - .dev = { - .platform_data = &pandora_vwlan, - }, -}; - -static struct gpiod_lookup_table pandora_vwlan_gpiod_table = { - .dev_id = "reg-fixed-voltage.1", - .table = { - /* - * As this is a low GPIO number it should be at the first - * GPIO bank. - */ - GPIO_LOOKUP("gpio-0-31", PANDORA_WIFI_NRESET_GPIO, - NULL, GPIO_ACTIVE_HIGH), - { }, - }, -}; - -static void pandora_wl1251_init_card(struct mmc_card *card) -{ - /* - * We have TI wl1251 attached to MMC3. Pass this information to - * SDIO core because it can't be probed by normal methods. - */ - if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) { - card->quirks |= MMC_QUIRK_NONSTD_SDIO; - card->cccr.wide_bus = 1; - card->cis.vendor = 0x104c; - card->cis.device = 0x9066; - card->cis.blksize = 512; - card->cis.max_dtr = 24000000; - card->ocr = 0x80; - } -} - -static struct omap2_hsmmc_info pandora_mmc3[] = { - { - .mmc = 3, - .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD, - .init_card = pandora_wl1251_init_card, - }, - {} /* Terminator */ -}; - -static void __init pandora_wl1251_init(void) -{ - struct wl1251_platform_data pandora_wl1251_pdata; - int ret; - - memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata)); - - pandora_wl1251_pdata.power_gpio = -1; - - ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq"); - if (ret < 0) - goto fail; - - pandora_wl1251_pdata.irq = gpio_to_irq(PANDORA_WIFI_IRQ_GPIO); - if (pandora_wl1251_pdata.irq < 0) - goto fail_irq; - - pandora_wl1251_pdata.use_eeprom = true; - ret = wl1251_set_platform_data(&pandora_wl1251_pdata); - if (ret < 0) - goto fail_irq; - - return; - -fail_irq: - gpio_free(PANDORA_WIFI_IRQ_GPIO); -fail: - pr_err("wl1251 board initialisation failed\n"); -} - static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); - gpiod_add_lookup_table(&pandora_vwlan_gpiod_table); - platform_device_register(&pandora_vwlan_device); - omap_hsmmc_init(pandora_mmc3); - omap_hsmmc_late_init(pandora_mmc3); - pandora_wl1251_init(); } #endif /* CONFIG_ARCH_OMAP3 */ @@ -567,6 +452,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL), OF_DEV_AUXDATA("ti,am3517-emac", 0x5c000000, "davinci_emac.0", &am35xx_emac_pdata), + OF_DEV_AUXDATA("nokia,n900-rom-rng", 0, NULL, rx51_secure_rng_call), /* McBSP modules with sidetone core */ #if IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP) OF_DEV_AUXDATA("ti,omap3-mcbsp", 0x49022000, "49022000.mcbsp", &mcbsp_pdata), diff --git a/arch/arm/mach-socfpga/pm.c b/arch/arm/mach-socfpga/pm.c index 6ed887cf8dc9..365c0428b21b 100644 --- a/arch/arm/mach-socfpga/pm.c +++ b/arch/arm/mach-socfpga/pm.c @@ -49,14 +49,14 @@ static int socfpga_setup_ocram_self_refresh(void) if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, socfpga_sdram_self_refresh_sz); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } ocram_pbase = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -67,7 +67,7 @@ static int socfpga_setup_ocram_self_refresh(void) if (!suspend_ocram_base) { pr_warn("%s: __arm_ioremap_exec failed!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } /* Copy the code that puts DDR in self refresh to ocram */ @@ -81,6 +81,8 @@ static int socfpga_setup_ocram_self_refresh(void) if (!socfpga_sdram_self_refresh_in_ocram) ret = -EFAULT; +put_device: + put_device(&pdev->dev); put_node: of_node_put(np); diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c index 239084cf8192..26cbce135338 100644 --- a/arch/arm/mach-sunxi/mc_smp.c +++ b/arch/arm/mach-sunxi/mc_smp.c @@ -481,14 +481,18 @@ static void sunxi_mc_smp_cpu_die(unsigned int l_cpu) static int sunxi_cpu_powerdown(unsigned int cpu, unsigned int cluster) { u32 reg; + int gating_bit = cpu; pr_debug("%s: cluster %u cpu %u\n", __func__, cluster, cpu); if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS) return -EINVAL; + if (is_a83t && cpu == 0) + gating_bit = 4; + /* gate processor power */ reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster)); - reg |= PRCM_PWROFF_GATING_REG_CORE(cpu); + reg |= PRCM_PWROFF_GATING_REG_CORE(gating_bit); writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster)); udelay(20); diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 67b763fea005..e3f34815c9da 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -44,16 +44,16 @@ ENTRY(tegra_resume) cmp r6, #TEGRA20 beq 1f @ Yes /* Clear the flow controller flags for this CPU. */ - cpu_to_csr_reg r1, r0 + cpu_to_csr_reg r3, r0 mov32 r2, TEGRA_FLOW_CTRL_BASE - ldr r1, [r2, r1] + ldr r1, [r2, r3] /* Clear event & intr flag */ orr r1, r1, \ #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG movw r0, #0x3FFD @ enable, cluster_switch, immed, bitmaps @ & ext flags for CPU power mgnt bic r1, r1, r0 - str r1, [r2] + str r1, [r2, r3] 1: mov32 r9, 0xc09 diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index b408fa56eb89..6922dd8d3e2d 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -370,6 +370,14 @@ _pll_m_c_x_done: pll_locked r1, r0, CLK_RESET_PLLC_BASE pll_locked r1, r0, CLK_RESET_PLLX_BASE + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 + beq 1f + ldr r1, [r0, #CLK_RESET_PLLP_BASE] + bic r1, r1, #(1<<31) @ disable PllP bypass + str r1, [r0, #CLK_RESET_PLLP_BASE] +1: + mov32 r7, TEGRA_TMRUS_BASE ldr r1, [r7] add r1, r1, #LOCK_DELAY @@ -630,7 +638,10 @@ tegra30_switch_cpu_to_clk32k: str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] /* disable PLLP, PLLA, PLLC and PLLX */ + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 ldr r0, [r5, #CLK_RESET_PLLP_BASE] + orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster bic r0, r0, #(1 << 30) str r0, [r5, #CLK_RESET_PLLP_BASE] ldr r0, [r5, #CLK_RESET_PLLA_BASE] diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index e512e606eabd..5ea3421fa1e8 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -106,8 +106,8 @@ static const char * const tegra_dt_board_compat[] = { }; DT_MACHINE_START(TEGRA_DT, "NVIDIA Tegra SoC (Flattened Device Tree)") - .l2c_aux_val = 0x3c400001, - .l2c_aux_mask = 0xc20fc3fe, + .l2c_aux_val = 0x3c400000, + .l2c_aux_mask = 0xc20fc3ff, .smp = smp_ops(tegra_smp_ops), .map_io = tegra_map_common_io, .init_early = tegra_init_early, diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 0f5381d13494..55bbbc3b328f 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -551,8 +551,9 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) static int __init ve_spc_clk_init(void) { - int cpu; + int cpu, cluster; struct clk *clk; + bool init_opp_table[MAX_CLUSTERS] = { false }; if (!info) return 0; /* Continue only if SPC is initialised */ @@ -578,8 +579,17 @@ static int __init ve_spc_clk_init(void) continue; } + cluster = topology_physical_package_id(cpu_dev->id); + if (init_opp_table[cluster]) + continue; + if (ve_init_opp_table(cpu_dev)) pr_warn("failed to initialise cpu%d opp table\n", cpu); + else if (dev_pm_opp_set_sharing_cpus(cpu_dev, + topology_core_cpumask(cpu_dev->id))) + pr_warn("failed to mark OPPs shared for cpu%d\n", cpu); + else + init_opp_table[cluster] = true; } platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6067fa4de22b..529f54d94709 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -765,6 +765,36 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, return NULL; } +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst) +{ + u32 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_arm(instr); + + return fault; +} + +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) +{ + u16 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_thumb16(instr); + + return fault; +} + static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -772,10 +802,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) unsigned long instr = 0, instrptr; int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); unsigned int type; - unsigned int fault; u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; + int fault; if (interrupts_enabled(regs)) local_irq_enable(); @@ -784,15 +814,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); - fault = probe_kernel_address(ptr, tinstr); - tinstr = __mem_to_opcode_thumb16(tinstr); + + fault = alignment_get_thumb(regs, ptr, &tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ - u16 tinst2 = 0; - fault = probe_kernel_address(ptr + 1, tinst2); - tinst2 = __mem_to_opcode_thumb16(tinst2); + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { @@ -801,8 +830,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address((void *)instrptr, instr); - instr = __mem_to_opcode_arm(instr); + fault = alignment_get_arm(regs, (void *)instrptr, &instr); } if (fault) { diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 1aea01ba1262..5dccc1d6a651 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -46,7 +46,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); - ret = dma_alloc_from_global_coherent(size, dma_handle); + ret = dma_alloc_from_global_coherent(dev, size, dma_handle); /* * dma_alloc_from_global_coherent() may fail because: diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 13233c7917fe..e5e54a5151c9 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1197,6 +1197,9 @@ void __init adjust_lowmem_bounds(void) phys_addr_t block_start = reg->base; phys_addr_t block_end = reg->base + reg->size; + if (memblock_is_nomap(reg)) + continue; + if (reg->base < vmalloc_limit) { if (block_end > lowmem_limit) /* diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 5461d589a1e2..60ac7c5999a9 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -5,6 +5,7 @@ * VMA_VM_FLAGS * VM_EXEC */ +#include <linux/const.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> @@ -30,7 +31,7 @@ * act_mm - get current->active_mm */ .macro act_mm, rd - bic \rd, sp, #8128 + bic \rd, sp, #(THREAD_SIZE - 1) & ~63 bic \rd, \rd, #63 ldr \rd, [\rd, #TI_TASK] .if (TSK_ACTIVE_MM > IMM12_MASK) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 9a07916af8dd..a6554fdb56c5 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -65,6 +65,9 @@ static void cpu_v7_spectre_init(void) break; #ifdef CONFIG_ARM_PSCI + case ARM_CPU_PART_BRAHMA_B53: + /* Requires no workaround */ + break; default: /* Other ARM CPUs require no workaround */ if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1448f144e7fb..1a49d503eafc 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -132,13 +132,11 @@ __v7m_setup_cont: dsb mov r6, lr @ save LR ldr sp, =init_thread_union + THREAD_START_SP - stmia sp, {r0-r3, r12} cpsie i svc #0 1: cpsid i - ldr r0, =exc_ret - orr lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK - str lr, [r0] + /* Calculate exc_ret */ + orr r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK ldmia sp, {r0-r3, r12} str r5, [r12, #11 * 4] @ restore the original SVC vector entry mov lr, r6 @ restore LR diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index adff54c312bf..6484d0fc4c93 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -927,7 +927,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSR operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for LSR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); @@ -953,7 +957,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do ARSH operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for ASR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); @@ -990,21 +998,35 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], arm_bpf_put_reg32(dst_hi, rd[0], ctx); } +static bool is_ldst_imm(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + case BPF_DW: + /* Need to make sure off+4 does not overflow. */ + off_max = 0xfff - 4; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s32 off_max; s8 rd; rd = arm_bpf_get_reg32(dst, tmp[1], ctx); - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); rd = tmp[0]; @@ -1033,18 +1055,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[], /* dst = *(size*)(src + off) */ static inline void emit_ldx_r(const s8 dst[], const s8 src, - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *rd = is_stacked(dst_lo) ? tmp : dst; s8 rm = src; - s32 off_max; - - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cf5f1dafcf74..5a785e62d882 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -539,6 +539,22 @@ config ARM64_ERRATUM_1463225 If unsure, say Y. +config ARM64_ERRATUM_1542419 + bool "Neoverse-N1: workaround mis-ordering of instruction fetches" + default y + help + This option adds a workaround for ARM Neoverse-N1 erratum + 1542419. + + Affected Neoverse-N1 cores could execute a stale instruction when + modified by another CPU. The workaround depends on a firmware + counterpart. + + Workaround the issue by hiding the DIC feature from EL0. This + forces user-space to perform cache maintenance. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -597,6 +613,31 @@ config CAVIUM_ERRATUM_30115 If unsure, say Y. +config CAVIUM_ERRATUM_36890 + bool "Cavium erratum 36890" + default y + help + Enable workaround for erratum 36890. On all ThunderX T88xx and + OcteonTX1 T81/T83 and some OcteonTX2 chips, the "dc zva" instruction + does not work all the time. This happens when there are two VAs + that match up with one PA; including when the two VAs match but have + different asids. The fix is to disable "dc zva" in userspace. + + If unsure, say Y. + +config MRVL_ERRATUM_37119 + bool "Marvell erratum 37119" + default y + help + Enable workaround for erratum 37119. On some OcteonTX2 chips, + while using the SSO workslots in userspace can cause problems + as the cache are not invalidated correctly. To workaround + this erratum, a local tlbi is placed before the eret. + This TLBI does not need to invalidate anything so a tlbi against + asid 0 is used. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y @@ -682,6 +723,14 @@ config FUJITSU_ERRATUM_010001 endmenu +config MRVL_OCTEONTX_EL0_INTR + bool "Handle interrupts in EL0 via EL3" + default y + help + Handle certain interrupts in EL0 with the help of EL3 firmware to achieve + low latency and also not break task isolation. + Generally implemented and tested on OcteonTx and its successive + generation CPUs. choice prompt "Page size" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index e9d2e578cbe6..20bd72e1b8fe 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -67,6 +67,10 @@ stack_protector_prepare: prepare0 include/generated/asm-offsets.h)) endif +# Ensure that if the compiler supports branch protection we default it +# off. +KBUILD_CFLAGS += $(call cc-option,-mbranch-protection=none) + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1f012c506434..cd3414898d10 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.gz +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts index 01a9a52edae4..393c1948a495 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts @@ -140,7 +140,7 @@ &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; - vmmc-supply = <®_aldo2>; + vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_dldo4>; mmc-pwrseq = <&wifi_pwrseq>; bus-width = <4>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index 24f1aac366d6..d5b6e8159a33 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -63,3 +63,12 @@ reg = <1>; }; }; + +®_dc1sw { + /* + * Ethernet PHY needs 30ms to properly power up and some more + * to initialize. 100ms should be plenty of time to finish + * whole process. + */ + regulator-enable-ramp-delay = <100000>; +}; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts index 2b6345db7dc0..78c82a665c84 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts @@ -104,6 +104,7 @@ &ehci0 { phys = <&usbphy 0>; + phy-names = "usb"; status = "okay"; }; @@ -150,6 +151,7 @@ &ohci0 { phys = <&usbphy 0>; + phy-names = "usb"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index e6fb9683f213..25099202c52c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -159,6 +159,12 @@ }; ®_dc1sw { + /* + * Ethernet PHY needs 30ms to properly power up and some more + * to initialize. 100ms should be plenty of time to finish + * whole process. + */ + regulator-enable-ramp-delay = <100000>; regulator-name = "vcc-phy"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 8c5b521e6389..822f821e9d65 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -144,10 +144,10 @@ pmu { compatible = "arm,cortex-a53-pmu"; - interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; }; @@ -227,11 +227,11 @@ display_clocks: clock@0 { compatible = "allwinner,sun50i-a64-de2-clk"; - reg = <0x0 0x100000>; - clocks = <&ccu CLK_DE>, - <&ccu CLK_BUS_DE>; - clock-names = "mod", - "bus"; + reg = <0x0 0x10000>; + clocks = <&ccu CLK_BUS_DE>, + <&ccu CLK_DE>; + clock-names = "bus", + "mod"; resets = <&ccu RST_BUS_DE>; #clock-cells = <1>; #reset-cells = <1>; @@ -553,6 +553,7 @@ resets = <&ccu RST_BUS_OHCI1>, <&ccu RST_BUS_EHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -564,6 +565,7 @@ <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi index f002a496d7cb..eaf8f83794fd 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi @@ -54,21 +54,21 @@ enable-method = "psci"; }; - cpu@1 { + cpu1: cpu@1 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <1>; enable-method = "psci"; }; - cpu@2 { + cpu2: cpu@2 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <2>; enable-method = "psci"; }; - cpu@3 { + cpu3: cpu@3 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <3>; @@ -76,6 +76,15 @@ }; }; + pmu { + compatible = "arm,cortex-a53-pmu"; + interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + psci { compatible = "arm,psci-0.2"; method = "smc"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 16c5c3d0fd81..1d6c8566fd5e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -78,6 +78,15 @@ clock-output-names = "osc32k"; }; + pmu { + compatible = "arm,cortex-a53-pmu"; + interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + psci { compatible = "arm,psci-0.2"; method = "smc"; @@ -456,6 +465,7 @@ resets = <&ccu RST_BUS_OHCI3>, <&ccu RST_BUS_EHCI3>; phys = <&usb2phy 3>; + phy-names = "usb"; status = "disabled"; }; @@ -467,6 +477,7 @@ <&ccu CLK_USB_OHCI3>; resets = <&ccu RST_BUS_OHCI3>; phys = <&usb2phy 3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 470dcfd9de91..3df00f987e25 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -61,10 +61,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index 34704fecf756..0b31c6b940a1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -1141,7 +1141,7 @@ toddr_a: audio-controller@100 { compatible = "amlogic,axg-toddr"; - reg = <0x0 0x100 0x0 0x1c>; + reg = <0x0 0x100 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_A"; interrupts = <GIC_SPI 84 IRQ_TYPE_EDGE_RISING>; @@ -1152,7 +1152,7 @@ toddr_b: audio-controller@140 { compatible = "amlogic,axg-toddr"; - reg = <0x0 0x140 0x0 0x1c>; + reg = <0x0 0x140 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_B"; interrupts = <GIC_SPI 85 IRQ_TYPE_EDGE_RISING>; @@ -1163,7 +1163,7 @@ toddr_c: audio-controller@180 { compatible = "amlogic,axg-toddr"; - reg = <0x0 0x180 0x0 0x1c>; + reg = <0x0 0x180 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_C"; interrupts = <GIC_SPI 86 IRQ_TYPE_EDGE_RISING>; @@ -1174,7 +1174,7 @@ frddr_a: audio-controller@1c0 { compatible = "amlogic,axg-frddr"; - reg = <0x0 0x1c0 0x0 0x1c>; + reg = <0x0 0x1c0 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_A"; interrupts = <GIC_SPI 88 IRQ_TYPE_EDGE_RISING>; @@ -1185,7 +1185,7 @@ frddr_b: audio-controller@200 { compatible = "amlogic,axg-frddr"; - reg = <0x0 0x200 0x0 0x1c>; + reg = <0x0 0x200 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_B"; interrupts = <GIC_SPI 89 IRQ_TYPE_EDGE_RISING>; @@ -1196,7 +1196,7 @@ frddr_c: audio-controller@240 { compatible = "amlogic,axg-frddr"; - reg = <0x0 0x240 0x0 0x1c>; + reg = <0x0 0x240 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_C"; interrupts = <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>; @@ -1707,18 +1707,18 @@ }; sram: sram@fffc0000 { - compatible = "amlogic,meson-axg-sram", "mmio-sram"; + compatible = "mmio-sram"; reg = <0x0 0xfffc0000 0x0 0x20000>; #address-cells = <1>; #size-cells = <1>; ranges = <0 0x0 0xfffc0000 0x20000>; - cpu_scp_lpri: scp-shmem@13000 { + cpu_scp_lpri: scp-sram@13000 { compatible = "amlogic,meson-axg-scp-shmem"; reg = <0x13000 0x400>; }; - cpu_scp_hpri: scp-shmem@13400 { + cpu_scp_hpri: scp-sram@13400 { compatible = "amlogic,meson-axg-scp-shmem"; reg = <0x13400 0x400>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi index 4c92c197aeb8..85b3ba605a23 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi @@ -589,7 +589,7 @@ reg = <0x0 0xff400000 0x0 0x40000>; interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clkc CLKID_USB1_DDR_BRIDGE>; - clock-names = "ddr"; + clock-names = "otg"; phys = <&usb2_phy1>; phy-names = "usb2-phy"; dr_mode = "peripheral"; @@ -605,6 +605,7 @@ dr_mode = "host"; snps,dis_u2_susphy_quirk; snps,quirk-frame-length-adjustment; + snps,parkmode-disable-ss-quirk; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 6772709b9e19..0e7e844d0508 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -345,20 +345,20 @@ }; sram: sram@c8000000 { - compatible = "amlogic,meson-gx-sram", "amlogic,meson-gxbb-sram", "mmio-sram"; + compatible = "mmio-sram"; reg = <0x0 0xc8000000 0x0 0x14000>; #address-cells = <1>; #size-cells = <1>; ranges = <0 0x0 0xc8000000 0x14000>; - cpu_scp_lpri: scp-shmem@0 { - compatible = "amlogic,meson-gx-scp-shmem", "amlogic,meson-gxbb-scp-shmem"; + cpu_scp_lpri: scp-sram@0 { + compatible = "amlogic,meson-gxbb-scp-shmem"; reg = <0x13000 0x400>; }; - cpu_scp_hpri: scp-shmem@200 { - compatible = "amlogic,meson-gx-scp-shmem", "amlogic,meson-gxbb-scp-shmem"; + cpu_scp_hpri: scp-sram@200 { + compatible = "amlogic,meson-gxbb-scp-shmem"; reg = <0x13400 0x400>; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index 1cc9dc68ef00..52517f9520b5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -291,7 +291,7 @@ }; &usb0_phy { - status = "okay"; + status = "disabled"; phy-supply = <&usb_otg_pwr>; }; @@ -301,7 +301,7 @@ }; &usb0 { - status = "okay"; + status = "disabled"; }; &usb1 { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts index 82b1c4851147..e034bbff8e66 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts @@ -9,7 +9,7 @@ #include <dt-bindings/input/input.h> -#include "meson-gxl-s905x.dtsi" +#include "meson-gxl-s805x.dtsi" / { compatible = "libretech,aml-s805x-ac", "amlogic,s805x", diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts index 70433e023fda..9abfc2512b69 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts @@ -9,7 +9,7 @@ #include <dt-bindings/input/input.h> -#include "meson-gxl-s905x.dtsi" +#include "meson-gxl-s805x.dtsi" / { compatible = "amlogic,p241", "amlogic,s805x", "amlogic,meson-gxl"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi new file mode 100644 index 000000000000..f9d705648426 --- /dev/null +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2020 BayLibre SAS + * Author: Neil Armstrong <narmstrong@baylibre.com> + */ + +#include "meson-gxl-s905x.dtsi" + +/ { + compatible = "amlogic,s805x", "amlogic,meson-gxl"; +}; + +/* The S805X Package doesn't seem to handle the 744MHz OPP correctly */ +&mali { + assigned-clocks = <&clkc CLKID_MALI_0_SEL>, + <&clkc CLKID_MALI_0>, + <&clkc CLKID_MALI>; /* Glitch free mux */ + assigned-clock-parents = <&clkc CLKID_FCLK_DIV3>, + <0>, /* Do Nothing */ + <&clkc CLKID_MALI_0>; + assigned-clock-rates = <0>, /* Do Nothing */ + <666666666>, + <0>; /* Do Nothing */ +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index 5499e8de5c74..2ab7d8437462 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -33,11 +33,9 @@ gpio-keys-polled { compatible = "gpio-keys-polled"; - #address-cells = <1>; - #size-cells = <0>; poll-interval = <100>; - button@0 { + power-button { label = "power"; linux,code = <KEY_POWER>; gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_LOW>; @@ -192,6 +190,9 @@ bluetooth { compatible = "brcm,bcm43438-bt"; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; + max-speed = <2000000>; + clocks = <&wifi32k>; + clock-names = "lpo"; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 3093ae421b17..f66a545aab51 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -291,6 +291,11 @@ }; }; +&hwrng { + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; +}; + &i2c_A { clocks = <&clkc CLKID_I2C>; }; diff --git a/arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi b/arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi index 15fe81738e94..dfb23dfc0b0f 100644 --- a/arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi +++ b/arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi @@ -8,7 +8,7 @@ gic: interrupt-controller@2c001000 { compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; #interrupt-cells = <3>; - #address-cells = <2>; + #address-cells = <1>; interrupt-controller; reg = <0x0 0x2c001000 0 0x1000>, <0x0 0x2c002000 0 0x2000>, diff --git a/arch/arm64/boot/dts/arm/foundation-v8-gicv3.dtsi b/arch/arm64/boot/dts/arm/foundation-v8-gicv3.dtsi index f2c75c756039..906f51935b36 100644 --- a/arch/arm64/boot/dts/arm/foundation-v8-gicv3.dtsi +++ b/arch/arm64/boot/dts/arm/foundation-v8-gicv3.dtsi @@ -8,9 +8,9 @@ gic: interrupt-controller@2f000000 { compatible = "arm,gic-v3"; #interrupt-cells = <3>; - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x0 0x2f000000 0x100000>; interrupt-controller; reg = <0x0 0x2f000000 0x0 0x10000>, <0x0 0x2f100000 0x0 0x200000>, @@ -22,7 +22,7 @@ its: its@2f020000 { compatible = "arm,gic-v3-its"; msi-controller; - reg = <0x0 0x2f020000 0x0 0x20000>; + reg = <0x20000 0x20000>; }; }; }; diff --git a/arch/arm64/boot/dts/arm/foundation-v8.dtsi b/arch/arm64/boot/dts/arm/foundation-v8.dtsi index 3f78373f708a..05d1657170b4 100644 --- a/arch/arm64/boot/dts/arm/foundation-v8.dtsi +++ b/arch/arm64/boot/dts/arm/foundation-v8.dtsi @@ -107,51 +107,51 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 63>; - interrupt-map = <0 0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, - <0 0 1 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, - <0 0 2 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, - <0 0 3 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, - <0 0 4 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, - <0 0 5 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, - <0 0 6 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, - <0 0 7 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, - <0 0 8 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, - <0 0 9 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, - <0 0 10 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, - <0 0 11 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, - <0 0 12 &gic 0 0 GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, - <0 0 13 &gic 0 0 GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, - <0 0 14 &gic 0 0 GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, - <0 0 15 &gic 0 0 GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, - <0 0 16 &gic 0 0 GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, - <0 0 17 &gic 0 0 GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, - <0 0 18 &gic 0 0 GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, - <0 0 19 &gic 0 0 GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, - <0 0 20 &gic 0 0 GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, - <0 0 21 &gic 0 0 GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, - <0 0 22 &gic 0 0 GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, - <0 0 23 &gic 0 0 GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, - <0 0 24 &gic 0 0 GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, - <0 0 25 &gic 0 0 GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, - <0 0 26 &gic 0 0 GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, - <0 0 27 &gic 0 0 GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, - <0 0 28 &gic 0 0 GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, - <0 0 29 &gic 0 0 GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, - <0 0 30 &gic 0 0 GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, - <0 0 31 &gic 0 0 GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, - <0 0 32 &gic 0 0 GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, - <0 0 33 &gic 0 0 GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, - <0 0 34 &gic 0 0 GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, - <0 0 35 &gic 0 0 GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, - <0 0 36 &gic 0 0 GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, - <0 0 37 &gic 0 0 GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, - <0 0 38 &gic 0 0 GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, - <0 0 39 &gic 0 0 GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, - <0 0 40 &gic 0 0 GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, - <0 0 41 &gic 0 0 GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, - <0 0 42 &gic 0 0 GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; - - ethernet@2,02000000 { + interrupt-map = <0 0 0 &gic 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <0 0 1 &gic 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <0 0 2 &gic 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <0 0 3 &gic 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <0 0 4 &gic 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <0 0 5 &gic 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <0 0 6 &gic 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <0 0 7 &gic 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <0 0 8 &gic 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <0 0 9 &gic 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <0 0 10 &gic 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <0 0 11 &gic 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, + <0 0 12 &gic 0 GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <0 0 13 &gic 0 GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, + <0 0 14 &gic 0 GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, + <0 0 15 &gic 0 GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, + <0 0 16 &gic 0 GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, + <0 0 17 &gic 0 GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, + <0 0 18 &gic 0 GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <0 0 19 &gic 0 GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, + <0 0 20 &gic 0 GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, + <0 0 21 &gic 0 GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, + <0 0 22 &gic 0 GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, + <0 0 23 &gic 0 GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, + <0 0 24 &gic 0 GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, + <0 0 25 &gic 0 GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, + <0 0 26 &gic 0 GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, + <0 0 27 &gic 0 GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, + <0 0 28 &gic 0 GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, + <0 0 29 &gic 0 GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, + <0 0 30 &gic 0 GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, + <0 0 31 &gic 0 GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, + <0 0 32 &gic 0 GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, + <0 0 33 &gic 0 GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, + <0 0 34 &gic 0 GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, + <0 0 35 &gic 0 GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, + <0 0 36 &gic 0 GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, + <0 0 37 &gic 0 GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, + <0 0 38 &gic 0 GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, + <0 0 39 &gic 0 GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <0 0 40 &gic 0 GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, + <0 0 41 &gic 0 GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, + <0 0 42 &gic 0 GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; + + ethernet@202000000 { compatible = "smsc,lan91c111"; reg = <2 0x02000000 0x10000>; interrupts = <15>; @@ -178,7 +178,7 @@ clock-output-names = "v2m:refclk32khz"; }; - iofpga@3,00000000 { + iofpga@300000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index 687707020ec1..10a7b5550681 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -161,10 +161,10 @@ bus-range = <0x0 0x1>; reg = <0x0 0x40000000 0x0 0x10000000>; ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>; - interrupt-map = <0 0 0 1 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &gic GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &gic GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gic 0 0 GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gic 0 0 GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; interrupt-map-mask = <0x0 0x0 0x0 0x7>; msi-map = <0x0 &its 0x0 0x10000>; iommu-map = <0x0 &smmu 0x0 0x10000>; diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 7446e0dc154d..24e040970190 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -6,7 +6,6 @@ /* * Devices shared by all Juno boards */ - dma-ranges = <0 0 0 0 0x100 0>; memtimer: timer@2a810000 { compatible = "arm,armv7-timer-mem"; @@ -63,35 +62,35 @@ <0x0 0x2c02f000 0 0x2000>, <0x0 0x2c04f000 0 0x2000>, <0x0 0x2c06f000 0 0x2000>; - #address-cells = <2>; + #address-cells = <1>; #interrupt-cells = <3>; - #size-cells = <2>; + #size-cells = <1>; interrupt-controller; interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_HIGH)>; - ranges = <0 0 0 0x2c1c0000 0 0x40000>; + ranges = <0 0 0x2c1c0000 0x40000>; v2m_0: v2m@0 { compatible = "arm,gic-v2m-frame"; msi-controller; - reg = <0 0 0 0x10000>; + reg = <0 0x10000>; }; v2m@10000 { compatible = "arm,gic-v2m-frame"; msi-controller; - reg = <0 0x10000 0 0x10000>; + reg = <0x10000 0x10000>; }; v2m@20000 { compatible = "arm,gic-v2m-frame"; msi-controller; - reg = <0 0x20000 0 0x10000>; + reg = <0x20000 0x10000>; }; v2m@30000 { compatible = "arm,gic-v2m-frame"; msi-controller; - reg = <0 0x30000 0 0x10000>; + reg = <0x30000 0x10000>; }; }; @@ -520,10 +519,10 @@ <0x42000000 0x40 0x00000000 0x40 0x00000000 0x1 0x00000000>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; - interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &gic 0 0 GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &gic 0 0 GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &gic 0 0 GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gic 0 GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gic 0 GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gic 0 GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gic 0 GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>; msi-parent = <&v2m_0>; status = "disabled"; iommu-map-mask = <0x0>; /* RC has no means to output PCI RID */ @@ -787,19 +786,19 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 15>; - interrupt-map = <0 0 0 &gic 0 0 GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>, - <0 0 1 &gic 0 0 GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>, - <0 0 2 &gic 0 0 GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>, - <0 0 3 &gic 0 0 GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>, - <0 0 4 &gic 0 0 GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>, - <0 0 5 &gic 0 0 GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>, - <0 0 6 &gic 0 0 GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>, - <0 0 7 &gic 0 0 GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>, - <0 0 8 &gic 0 0 GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>, - <0 0 9 &gic 0 0 GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>, - <0 0 10 &gic 0 0 GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>, - <0 0 11 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, - <0 0 12 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 &gic 0 GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>, + <0 0 1 &gic 0 GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>, + <0 0 2 &gic 0 GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>, + <0 0 3 &gic 0 GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>, + <0 0 4 &gic 0 GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>, + <0 0 5 &gic 0 GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>, + <0 0 6 &gic 0 GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>, + <0 0 7 &gic 0 GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>, + <0 0 8 &gic 0 GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>, + <0 0 9 &gic 0 GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>, + <0 0 10 &gic 0 GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>, + <0 0 11 &gic 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, + <0 0 12 &gic 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>; }; site2: tlx@60000000 { @@ -809,6 +808,6 @@ ranges = <0 0 0x60000000 0x10000000>; #interrupt-cells = <1>; interrupt-map-mask = <0 0>; - interrupt-map = <0 0 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 &gic 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>; }; }; diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi index e5e265dfa902..2870b5eeb198 100644 --- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi +++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi @@ -8,10 +8,10 @@ */ / { /* SoC fixed clocks */ - soc_uartclk: refclk7273800hz { + soc_uartclk: refclk7372800hz { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <7273800>; + clock-frequency = <7372800>; clock-output-names = "juno:uartclk"; }; diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi index 1792b074e9a3..cad3b16dba24 100644 --- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi @@ -103,7 +103,7 @@ }; }; - flash@0,00000000 { + flash@0 { /* 2 * 32MiB NOR Flash memory mounted on CS0 */ compatible = "arm,vexpress-flash", "cfi-flash"; linux,part-probe = "afs"; @@ -118,7 +118,7 @@ status = "disabled"; }; - ethernet@2,00000000 { + ethernet@200000000 { compatible = "smsc,lan9118", "smsc,lan9115"; reg = <2 0x00000000 0x10000>; interrupts = <3>; @@ -131,7 +131,7 @@ vddvario-supply = <&mb_fixed_3v3>; }; - iofpga@3,00000000 { + iofpga@300000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi index 57b0b9d7f3fa..29e6962c70bd 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi +++ b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi @@ -9,7 +9,7 @@ motherboard { arm,v2m-memory-map = "rs2"; - iofpga@3,00000000 { + iofpga@300000000 { virtio-p9@140000 { compatible = "virtio,mmio"; reg = <0x140000 0x200>; diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi index 454cf6c44c49..768e34fc3485 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi @@ -17,14 +17,14 @@ #interrupt-cells = <1>; ranges; - flash@0,00000000 { + flash@0 { compatible = "arm,vexpress-flash", "cfi-flash"; reg = <0 0x00000000 0x04000000>, <4 0x00000000 0x04000000>; bank-width = <4>; }; - ethernet@2,02000000 { + ethernet@202000000 { compatible = "smsc,lan91c111"; reg = <2 0x02000000 0x10000>; interrupts = <15>; @@ -51,7 +51,7 @@ clock-output-names = "v2m:refclk32khz"; }; - iofpga@3,00000000 { + iofpga@300000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi index 8a3a770e8f2c..56789ccf9454 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi @@ -42,13 +42,14 @@ pinmux: pinmux@14029c { compatible = "pinctrl-single"; - reg = <0x0014029c 0x250>; + reg = <0x0014029c 0x26c>; #address-cells = <1>; #size-cells = <1>; pinctrl-single,register-width = <32>; pinctrl-single,function-mask = <0xf>; pinctrl-single,gpio-range = < - &range 0 154 MODE_GPIO + &range 0 91 MODE_GPIO + &range 95 60 MODE_GPIO >; range: gpio-range { #pinctrl-single,gpio-range-cells = <3>; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 35c4670c00d1..afea9eae8d1b 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -463,8 +463,7 @@ <&pinmux 108 16 27>, <&pinmux 135 77 6>, <&pinmux 141 67 4>, - <&pinmux 145 149 6>, - <&pinmux 151 91 4>; + <&pinmux 145 149 6>; }; i2c1: i2c@e0000 { diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi index d29d13f4694f..deae8b29edce 100644 --- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi @@ -18,8 +18,8 @@ / { compatible = "samsung,exynos5433"; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; interrupt-parent = <&gic>; @@ -260,7 +260,7 @@ compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges; + ranges = <0x0 0x0 0x0 0x18000000>; chipid@10000000 { compatible = "samsung,exynos4210-chipid"; diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts index 00dd89b92b42..d991eae5202f 100644 --- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts +++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts @@ -152,6 +152,7 @@ regulator-min-microvolt = <700000>; regulator-max-microvolt = <1150000>; regulator-enable-ramp-delay = <125>; + regulator-always-on; }; ldo8_reg: LDO8 { diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi index 077d23478901..a0d55e5208e3 100644 --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi @@ -12,8 +12,8 @@ / { compatible = "samsung,exynos7"; interrupt-parent = <&gic>; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; aliases { pinctrl0 = &pinctrl_alive; @@ -87,7 +87,7 @@ compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges; + ranges = <0 0 0 0x18000000>; chipid@10000000 { compatible = "samsung,exynos4210-chipid"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts index b359068d9605..d8be59911c45 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts @@ -89,7 +89,7 @@ status = "okay"; i2c-mux@77 { - compatible = "nxp,pca9847"; + compatible = "nxp,pca9547"; reg = <0x77>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index bf7f845447ed..72946d42257d 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -72,7 +72,7 @@ reboot { compatible ="syscon-reboot"; - regmap = <&dcfg>; + regmap = <&rst>; offset = <0xb0>; mask = <0x02>; }; @@ -128,7 +128,13 @@ dcfg: syscon@1e00000 { compatible = "fsl,ls1028a-dcfg", "syscon"; reg = <0x0 0x1e00000 0x0 0x10000>; - big-endian; + little-endian; + }; + + rst: syscon@1e60000 { + compatible = "syscon"; + reg = <0x0 0x1e60000 0x0 0x10000>; + little-endian; }; scfg: syscon@1fc0000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi index 6082ae022136..d237162a8744 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi @@ -20,6 +20,8 @@ }; &fman0 { + fsl,erratum-a050385; + /* these aliases provide the FMan ports mapping */ enet0: ethernet@e0000 { }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts index 4223a2352d45..dde50c88f5e3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts @@ -119,12 +119,12 @@ ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts index 6a6514d0e5a9..274339759114 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts @@ -127,12 +127,12 @@ &fman0 { ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 125a8cc2c5b3..84523e535fa1 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -33,7 +33,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster0_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@1 { @@ -49,7 +49,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster0_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@100 { @@ -65,7 +65,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster1_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@101 { @@ -81,7 +81,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster1_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@200 { @@ -97,7 +97,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster2_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@201 { @@ -113,7 +113,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster2_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@300 { @@ -129,7 +129,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster3_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@301 { @@ -145,7 +145,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster3_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@400 { @@ -161,7 +161,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster4_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@401 { @@ -177,7 +177,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster4_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@500 { @@ -193,7 +193,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster5_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@501 { @@ -209,7 +209,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster5_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@600 { @@ -225,7 +225,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster6_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@601 { @@ -241,7 +241,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster6_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@700 { @@ -257,7 +257,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster7_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cpu@701 { @@ -273,7 +273,7 @@ i-cache-line-size = <64>; i-cache-sets = <192>; next-level-cache = <&cluster7_l2>; - cpu-idle-states = <&cpu_pw20>; + cpu-idle-states = <&cpu_pw15>; }; cluster0_l2: l2-cache0 { @@ -340,9 +340,9 @@ cache-level = <2>; }; - cpu_pw20: cpu-pw20 { + cpu_pw15: cpu-pw15 { compatible = "arm,idle-state"; - idle-state-name = "PW20"; + idle-state-name = "PW15"; arm,psci-suspend-param = <0x0>; entry-latency-us = <2000>; exit-latency-us = <2000>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 6b407a94c06e..5a2811707d0e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -281,7 +281,7 @@ }; sdma2: dma-controller@302c0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x302c0000 0x10000>; interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MM_CLK_SDMA2_ROOT>, @@ -292,7 +292,7 @@ }; sdma3: dma-controller@302b0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x302b0000 0x10000>; interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MM_CLK_SDMA3_ROOT>, @@ -554,7 +554,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -570,7 +570,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -584,7 +584,7 @@ compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc"; reg = <0x30b60000 0x10000>; interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MM_CLK_DUMMY>, + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, <&clk IMX8MM_CLK_NAND_USDHC_BUS>, <&clk IMX8MM_CLK_USDHC3_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -597,11 +597,11 @@ }; sdma1: dma-controller@30bd0000 { - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; reg = <0x30bd0000 0x10000>; interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>, - <&clk IMX8MM_CLK_SDMA1_ROOT>; + <&clk IMX8MM_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin"; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index 7a1706f969f0..c25be32ba37e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -100,9 +100,9 @@ regulator-name = "0V9_ARM"; regulator-min-microvolt = <900000>; regulator-max-microvolt = <1000000>; - gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; - states = <1000000 0x0 - 900000 0x1>; + gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>; + states = <1000000 0x1 + 900000 0x0>; regulator-always-on; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 6632cbd88bed..73d34800ac23 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -743,7 +743,7 @@ "fsl,imx7d-usdhc"; reg = <0x30b40000 0x10000>; interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MQ_CLK_DUMMY>, + clocks = <&clk IMX8MQ_CLK_IPG_ROOT>, <&clk IMX8MQ_CLK_NAND_USDHC_BUS>, <&clk IMX8MQ_CLK_USDHC1_ROOT>; clock-names = "ipg", "ahb", "per"; @@ -760,7 +760,7 @@ "fsl,imx7d-usdhc"; reg = <0x30b50000 0x10000>; interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MQ_CLK_DUMMY>, + clocks = <&clk IMX8MQ_CLK_IPG_ROOT>, <&clk IMX8MQ_CLK_NAND_USDHC_BUS>, <&clk IMX8MQ_CLK_USDHC2_ROOT>; clock-names = "ipg", "ahb", "per"; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts index bfdada2db176..399767d25725 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts @@ -52,11 +52,6 @@ compatible = "ethernet-phy-ieee802.3-c22"; reg = <0>; }; - - ethphy1: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; }; }; diff --git a/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts b/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts index e035cf195b19..8c4bfbaf3a80 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts +++ b/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts @@ -530,6 +530,17 @@ status = "ok"; compatible = "adi,adv7533"; reg = <0x39>; + adi,dsi-lanes = <4>; + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { + reg = <1>; + }; + }; }; }; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index c14205cd6bf5..3e47150c05ec 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -516,7 +516,7 @@ reg = <0x39>; interrupt-parent = <&gpio1>; interrupts = <1 2>; - pd-gpio = <&gpio0 4 0>; + pd-gpios = <&gpio0 4 0>; adi,dsi-lanes = <4>; #sound-dai-cells = <0>; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index e4ceb3a73c81..bf2d744185df 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -47,10 +47,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, @@ -82,7 +82,7 @@ ranges = <0 0 0 0xffffffff>; gmac0: ethernet@ff800000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff800000 0x2000>; interrupts = <0 90 4>; interrupt-names = "macirq"; @@ -97,7 +97,7 @@ }; gmac1: ethernet@ff802000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff802000 0x2000>; interrupts = <0 91 4>; interrupt-names = "macirq"; @@ -112,7 +112,7 @@ }; gmac2: ethernet@ff804000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff804000 0x2000>; interrupts = <0 92 4>; interrupt-names = "macirq"; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts index bd4aab6092e0..e31813a4f972 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts @@ -143,6 +143,7 @@ phy-mode = "sgmii"; status = "okay"; managed = "in-band-status"; + phys = <&comphy1 0>; sfp = <&sfp_eth0>; }; @@ -150,11 +151,14 @@ phy-mode = "sgmii"; status = "okay"; managed = "in-band-status"; + phys = <&comphy0 1>; sfp = <&sfp_eth1>; }; &usb3 { status = "okay"; + phys = <&usb2_utmi_otg_phy>; + phy-names = "usb2-utmi-otg-phy"; }; &uart0 { diff --git a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts index 9143aa13ceb1..e88efafbda39 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts @@ -364,6 +364,7 @@ pinctrl-0 = <&cp0_copper_eth_phy_reset>; reset-gpios = <&cp0_gpio2 11 GPIO_ACTIVE_LOW>; reset-assert-us = <10000>; + reset-deassert-us = <10000>; }; switch0: switch0@4 { @@ -405,6 +406,8 @@ reg = <5>; label = "cpu"; ethernet = <&cp1_eth2>; + phy-mode = "2500base-x"; + managed = "in-band-status"; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi index 4d6e4a097f72..eeaaaf188bd4 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi @@ -423,10 +423,10 @@ CP110_LABEL(nand_controller): nand@720000 { /* - * Due to the limitation of the pins available - * this controller is only usable on the CPM - * for A7K and on the CPS for A8K. - */ + * Due to the limitation of the pins available + * this controller is only usable on the CPM + * for A7K and on the CPS for A8K. + */ compatible = "marvell,armada-8k-nand-controller", "marvell,armada370-nand-controller"; reg = <0x720000 0x54>; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 15f1842f6df3..dec5e4113ce4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -238,21 +238,21 @@ cpu_on = <0x84000003>; }; - clk26m: oscillator@0 { + clk26m: oscillator0 { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <26000000>; clock-output-names = "clk26m"; }; - clk32k: oscillator@1 { + clk32k: oscillator1 { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <32000>; clock-output-names = "clk32k"; }; - cpum_ck: oscillator@2 { + cpum_ck: oscillator2 { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <0>; @@ -268,19 +268,19 @@ sustainable-power = <1500>; /* milliwatts */ trips { - threshold: trip-point@0 { + threshold: trip-point0 { temperature = <68000>; hysteresis = <2000>; type = "passive"; }; - target: trip-point@1 { + target: trip-point1 { temperature = <85000>; hysteresis = <2000>; type = "passive"; }; - cpu_crit: cpu_crit@0 { + cpu_crit: cpu_crit0 { temperature = <115000>; hysteresis = <2000>; type = "critical"; @@ -288,13 +288,13 @@ }; cooling-maps { - map@0 { + map0 { trip = <&target>; cooling-device = <&cpu0 0 0>, <&cpu1 0 0>; contribution = <3072>; }; - map@1 { + map1 { trip = <&target>; cooling-device = <&cpu2 0 0>, <&cpu3 0 0>; @@ -308,7 +308,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; - vpu_dma_reserved: vpu_dma_mem_region { + vpu_dma_reserved: vpu_dma_mem_region@b7000000 { compatible = "shared-dma-pool"; reg = <0 0xb7000000 0 0x500000>; alignment = <0x1000>; @@ -360,7 +360,7 @@ reg = <0 0x10005000 0 0x1000>; }; - pio: pinctrl@10005000 { + pio: pinctrl@1000b000 { compatible = "mediatek,mt8173-pinctrl"; reg = <0 0x1000b000 0 0x1000>; mediatek,pctl-regmap = <&syscfg_pctl_a>; @@ -567,7 +567,7 @@ status = "disabled"; }; - gic: interrupt-controller@10220000 { + gic: interrupt-controller@10221000 { compatible = "arm,gic-400"; #interrupt-cells = <3>; interrupt-parent = <&gic>; @@ -1397,8 +1397,8 @@ "venc_lt_sel"; assigned-clocks = <&topckgen CLK_TOP_VENC_SEL>, <&topckgen CLK_TOP_VENC_LT_SEL>; - assigned-clock-parents = <&topckgen CLK_TOP_VENCPLL_D2>, - <&topckgen CLK_TOP_UNIVPLL1_D2>; + assigned-clock-parents = <&topckgen CLK_TOP_VCODECPLL>, + <&topckgen CLK_TOP_VCODECPLL_370P5>; }; vencltsys: clock-controller@19000000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi index 0fd5bd29fbf9..e56fb39e9eb2 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi @@ -32,7 +32,7 @@ phy-reset-gpios = <&gpio TEGRA194_MAIN_GPIO(G, 5) GPIO_ACTIVE_LOW>; phy-handle = <&phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; mdio { #address-cells = <1>; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi index a7dc319214a4..b0095072bc28 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi @@ -1612,7 +1612,7 @@ regulator-name = "VDD_HDMI_5V0"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; - gpio = <&exp1 12 GPIO_ACTIVE_LOW>; + gpio = <&exp1 12 GPIO_ACTIVE_HIGH>; enable-active-high; vin-supply = <&vdd_5v0_sys>; }; diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 04ad2fb22b9a..dba3488492f1 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -623,6 +623,8 @@ l21 { regulator-min-microvolt = <2950000>; regulator-max-microvolt = <2950000>; + regulator-allow-set-load; + regulator-system-load = <200000>; }; l22 { regulator-min-microvolt = <3300000>; diff --git a/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi b/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi index 242aaea68804..1235830ffd0b 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi @@ -508,7 +508,7 @@ pins = "gpio63", "gpio64", "gpio65", "gpio66", "gpio67", "gpio68"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; cdc_pdm_lines_sus: pdm_lines_off { @@ -537,7 +537,7 @@ pins = "gpio113", "gpio114", "gpio115", "gpio116"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; @@ -565,7 +565,7 @@ pinconf { pins = "gpio110"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; @@ -591,7 +591,7 @@ pinconf { pins = "gpio116"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; ext_mclk_tlmm_lines_sus: mclk_lines_off { @@ -619,7 +619,7 @@ pins = "gpio112", "gpio117", "gpio118", "gpio119"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; ext_sec_tlmm_lines_sus: tlmm_lines_off { diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index dacd465fc62e..35a6609f5228 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -209,7 +209,7 @@ thermal-sensors = <&tsens 3>; trips { - cpu2_3_alert0: trip-point@0 { + cpu2_3_alert0: trip-point0 { temperature = <75000>; hysteresis = <2000>; type = "passive"; @@ -239,7 +239,7 @@ thermal-sensors = <&tsens 2>; trips { - gpu_alert0: trip-point@0 { + gpu_alert0: trip-point0 { temperature = <75000>; hysteresis = <2000>; type = "passive"; @@ -259,7 +259,7 @@ thermal-sensors = <&tsens 1>; trips { - cam_alert0: trip-point@0 { + cam_alert0: trip-point0 { temperature = <75000>; hysteresis = <2000>; type = "hot"; @@ -274,7 +274,7 @@ thermal-sensors = <&tsens 0>; trips { - modem_alert0: trip-point@0 { + modem_alert0: trip-point0 { temperature = <85000>; hysteresis = <2000>; type = "hot"; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 942465d8aeb7..f976221ff144 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -1107,6 +1107,8 @@ interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>; phys = <&hsusb_phy2>; phy-names = "usb2-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -1137,6 +1139,8 @@ interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>; phys = <&hsusb_phy1>, <&ssusb_phy_0>; phy-names = "usb2-phy", "usb3-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -1187,16 +1191,16 @@ "csi_clk_mux", "vfe0", "vfe1"; - interrupts = <GIC_SPI 78 0>, - <GIC_SPI 79 0>, - <GIC_SPI 80 0>, - <GIC_SPI 296 0>, - <GIC_SPI 297 0>, - <GIC_SPI 298 0>, - <GIC_SPI 299 0>, - <GIC_SPI 309 0>, - <GIC_SPI 314 0>, - <GIC_SPI 315 0>; + interrupts = <GIC_SPI 78 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 80 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 296 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 297 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 298 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 299 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 309 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 314 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 315 IRQ_TYPE_EDGE_RISING>; interrupt-names = "csiphy0", "csiphy1", "csiphy2", diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index 574be78a936e..c57f4fe0ea9f 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -760,7 +760,7 @@ tcsr_mutex_regs: syscon@1f40000 { compatible = "syscon"; - reg = <0x1f40000 0x20000>; + reg = <0x1f40000 0x40000>; }; apcs_glb: mailbox@9820000 { diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index d987d6741e40..9c1803f7e493 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -63,6 +63,7 @@ regulator-always-on; regulator-boot-on; regulator-name = "vdd_apc"; + regulator-initial-mode = <1>; regulator-min-microvolt = <1048000>; regulator-max-microvolt = <1352000>; }; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index de282c4794ed..7180e51d389f 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -883,6 +883,8 @@ <&cpg CPG_CORE R8A774A1_CLK_CANFD>, <&can_clk>; clock-names = "clkp1", "clkp2", "can_clk"; + assigned-clocks = <&cpg CPG_CORE R8A774A1_CLK_CANFD>; + assigned-clock-rates = <40000000>; power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>; resets = <&cpg 916>; status = "disabled"; @@ -897,6 +899,8 @@ <&cpg CPG_CORE R8A774A1_CLK_CANFD>, <&can_clk>; clock-names = "clkp1", "clkp2", "can_clk"; + assigned-clocks = <&cpg CPG_CORE R8A774A1_CLK_CANFD>; + assigned-clock-rates = <40000000>; power-domains = <&sysc R8A774A1_PD_ALWAYS_ON>; resets = <&cpg 915>; status = "disabled"; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index 3f86db199dbf..1e070139be2f 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -973,6 +973,8 @@ <&cpg CPG_CORE R8A774C0_CLK_CANFD>, <&can_clk>; clock-names = "clkp1", "clkp2", "can_clk"; + assigned-clocks = <&cpg CPG_CORE R8A774C0_CLK_CANFD>; + assigned-clock-rates = <40000000>; power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>; resets = <&cpg 916>; status = "disabled"; @@ -987,6 +989,8 @@ <&cpg CPG_CORE R8A774C0_CLK_CANFD>, <&can_clk>; clock-names = "clkp1", "clkp2", "can_clk"; + assigned-clocks = <&cpg CPG_CORE R8A774C0_CLK_CANFD>; + assigned-clock-rates = <40000000>; power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>; resets = <&cpg 915>; status = "disabled"; diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi index 5b6164d4b8e3..d8b29753c450 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi @@ -652,7 +652,7 @@ }; pwm3: pwm@e6e33000 { - compatible = "renesas,pwm-r8a7790", "renesas,pwm-rcar"; + compatible = "renesas,pwm-r8a77970", "renesas,pwm-rcar"; reg = <0 0xe6e33000 0 8>; #pwm-cells = <2>; clocks = <&cpg CPG_MOD 523>; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index a901a341dcf7..8a0bb4b6ceff 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -1318,6 +1318,7 @@ ipmmu_vip0: mmu@e7b00000 { compatible = "renesas,ipmmu-r8a77980"; reg = <0 0xe7b00000 0 0x1000>; + renesas,ipmmu-main = <&ipmmu_mm 4>; power-domains = <&sysc R8A77980_PD_ALWAYS_ON>; #iommu-cells = <1>; }; @@ -1325,6 +1326,7 @@ ipmmu_vip1: mmu@e7960000 { compatible = "renesas,ipmmu-r8a77980"; reg = <0 0xe7960000 0 0x1000>; + renesas,ipmmu-main = <&ipmmu_mm 11>; power-domains = <&sysc R8A77980_PD_ALWAYS_ON>; #iommu-cells = <1>; }; diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index c72772589953..6e048f45cb40 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -629,7 +629,6 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; clock-frequency = <12288000 11289600>; - clkout-lr-synchronous; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index eb992d60e6ba..9e09909a510a 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -768,7 +768,7 @@ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; @@ -783,7 +783,7 @@ interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; @@ -798,7 +798,7 @@ interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; power-domains = <&power PX30_PD_MMC_NAND>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts index 49c4b96da3d4..05265b38cc02 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts @@ -86,13 +86,13 @@ assigned-clock-rate = <50000000>; assigned-clocks = <&cru SCLK_MAC2PHY>; assigned-clock-parents = <&cru SCLK_MAC2PHY_SRC>; - + status = "okay"; }; &i2c1 { status = "okay"; - rk805: rk805@18 { + rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; interrupt-parent = <&gpio2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 7cfd5ca6cc85..fdee0115c19e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -170,7 +170,7 @@ &i2c1 { status = "okay"; - rk805: rk805@18 { + rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; interrupt-parent = <&gpio2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi index e17311e09082..216aafd90e7f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi @@ -156,7 +156,7 @@ pinctrl-0 = <&rgmii_pins>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - snps,reset-gpio = <&gpio3 RK_PB3 GPIO_ACTIVE_HIGH>; + snps,reset-gpio = <&gpio3 RK_PB3 GPIO_ACTIVE_LOW>; tx_delay = <0x10>; rx_delay = <0x10>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts index 2a127985ab17..d3ed8e5e770f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts @@ -94,33 +94,6 @@ }; }; -&gpu_thermal { - trips { - gpu_warm: gpu_warm { - temperature = <55000>; - hysteresis = <2000>; - type = "active"; - }; - - gpu_hot: gpu_hot { - temperature = <65000>; - hysteresis = <2000>; - type = "active"; - }; - }; - cooling-maps { - map1 { - trip = <&gpu_warm>; - cooling-device = <&fan THERMAL_NO_LIMIT 1>; - }; - - map2 { - trip = <&gpu_hot>; - cooling-device = <&fan 2 THERMAL_NO_LIMIT>; - }; - }; -}; - &pinctrl { ir { ir_rx: ir-rx { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 62ea288a1a70..45b86933c6ea 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -101,7 +101,7 @@ vcc5v0_host: vcc5v0-host-regulator { compatible = "regulator-fixed"; - gpio = <&gpio4 RK_PA3 GPIO_ACTIVE_HIGH>; + gpio = <&gpio4 RK_PA3 GPIO_ACTIVE_LOW>; enable-active-low; pinctrl-names = "default"; pinctrl-0 = <&vcc5v0_host_en>; @@ -157,7 +157,7 @@ phy-mode = "rgmii"; pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - snps,reset-gpio = <&gpio3 RK_PC0 GPIO_ACTIVE_HIGH>; + snps,reset-gpio = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; tx_delay = <0x10>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index 20ec7d1c25d7..f02d85ee657d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -166,7 +166,7 @@ regulator-always-on; regulator-boot-on; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; + regulator-max-microvolt = <1700000>; vin-supply = <&vcc5v0_sys>; }; }; @@ -240,8 +240,8 @@ rk808: pmic@1b { compatible = "rockchip,rk808"; reg = <0x1b>; - interrupt-parent = <&gpio1>; - interrupts = <21 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&gpio3>; + interrupts = <10 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <1>; clock-output-names = "xin32k", "rk808-clkout2"; pinctrl-names = "default"; @@ -549,7 +549,7 @@ pmic { pmic_int_l: pmic-int-l { - rockchip,pins = <1 RK_PC5 RK_FUNC_GPIO &pcfg_pull_up>; + rockchip,pins = <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up>; }; vsel1_gpio: vsel1-gpio { @@ -595,7 +595,6 @@ &sdmmc { bus-width = <4>; - cap-mmc-highspeed; cap-sd-highspeed; cd-gpios = <&gpio0 7 GPIO_ACTIVE_LOW>; disable-wp; @@ -607,8 +606,7 @@ &sdhci { bus-width = <8>; - mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; + mmc-hs200-1_8v; non-removable; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 89594a7276f4..a7e245d1bf8d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -410,7 +410,7 @@ reset-names = "usb3-otg"; status = "disabled"; - usbdrd_dwc3_0: dwc3 { + usbdrd_dwc3_0: usb@fe800000 { compatible = "snps,dwc3"; reg = <0x0 0xfe800000 0x0 0x100000>; interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH 0>; @@ -443,7 +443,7 @@ reset-names = "usb3-otg"; status = "disabled"; - usbdrd_dwc3_1: dwc3 { + usbdrd_dwc3_1: usb@fe900000 { compatible = "snps,dwc3"; reg = <0x0 0xfe900000 0x0 0x100000>; interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH 0>; @@ -1884,10 +1884,10 @@ gpu: gpu@ff9a0000 { compatible = "rockchip,rk3399-mali", "arm,mali-t860"; reg = <0x0 0xff9a0000 0x0 0x10000>; - interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH 0>; - interrupt-names = "gpu", "job", "mmu"; + interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH 0>, + <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH 0>, + <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH 0>; + interrupt-names = "job", "mmu", "gpu"; clocks = <&cru ACLK_GPU>; power-domains = <&power RK3399_PD_GPU>; status = "disabled"; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 21981b82e1b0..334cbf04e6bc 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -41,7 +41,7 @@ */ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>; - gic_its: gic-its@18200000 { + gic_its: gic-its@1820000 { compatible = "arm,gic-v3-its"; reg = <0x00 0x01820000 0x00 0x10000>; msi-controller; @@ -256,6 +256,7 @@ interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; dma-coherent; power-domains = <&k3_pds 151>; + clocks = <&k3_clks 151 2>, <&k3_clks 151 7>; assigned-clocks = <&k3_clks 151 2>, <&k3_clks 151 7>; assigned-clock-parents = <&k3_clks 151 4>, /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */ <&k3_clks 151 9>; /* set PIPE3_TXB_CLK to CLK_12M_RC/256 (for HS only) */ @@ -295,6 +296,7 @@ interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>; dma-coherent; power-domains = <&k3_pds 152>; + clocks = <&k3_clks 152 2>; assigned-clocks = <&k3_clks 152 2>; assigned-clock-parents = <&k3_clks 152 4>; /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */ diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi index 6f7d2b316ded..a409622da769 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi @@ -87,4 +87,45 @@ compatible = "ti,am654-adc", "ti,am3359-adc"; }; }; + + fss: fss@47000000 { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + ospi0: spi@47040000 { + compatible = "ti,am654-ospi", "cdns,qspi-nor"; + reg = <0x0 0x47040000 0x0 0x100>, + <0x5 0x00000000 0x1 0x0000000>; + interrupts = <GIC_SPI 552 IRQ_TYPE_LEVEL_HIGH>; + cdns,fifo-depth = <256>; + cdns,fifo-width = <4>; + cdns,trigger-address = <0x50000000>; + cdns,delay-elem-ps = <80>; + clocks = <&k3_clks 55 5>; + assigned-clocks = <&k3_clks 55 5>; + assigned-clock-parents = <&k3_clks 55 7>; + assigned-clock-rates = <166666666>; + power-domains = <&k3_pds 55>; + #address-cells = <1>; + #size-cells = <0>; + dma-coherent; + }; + + ospi1: spi@47050000 { + compatible = "ti,am654-ospi", "cdns,qspi-nor"; + reg = <0x0 0x47050000 0x0 0x100>, + <0x7 0x00000000 0x1 0x00000000>; + interrupts = <GIC_SPI 553 IRQ_TYPE_LEVEL_HIGH>; + cdns,fifo-depth = <256>; + cdns,fifo-width = <4>; + cdns,trigger-address = <0x58000000>; + clocks = <&k3_clks 55 16>; + power-domains = <&k3_pds 55>; + #address-cells = <1>; + #size-cells = <0>; + dma-coherent; + }; + }; }; diff --git a/arch/arm64/boot/dts/ti/k3-am65.dtsi b/arch/arm64/boot/dts/ti/k3-am65.dtsi index 50f4be2047a9..e49e52c7e8f5 100644 --- a/arch/arm64/boot/dts/ti/k3-am65.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65.dtsi @@ -74,7 +74,10 @@ <0x00 0x42040000 0x00 0x42040000 0x00 0x03ac2400>, <0x00 0x45100000 0x00 0x45100000 0x00 0x00c24000>, <0x00 0x46000000 0x00 0x46000000 0x00 0x00200000>, - <0x00 0x47000000 0x00 0x47000000 0x00 0x00068400>; + <0x00 0x47000000 0x00 0x47000000 0x00 0x00068400>, + <0x00 0x50000000 0x00 0x50000000 0x00 0x8000000>, + <0x05 0x00000000 0x05 0x00000000 0x01 0x0000000>, + <0x07 0x00000000 0x07 0x00000000 0x01 0x0000000>; cbass_mcu: interconnect@28380000 { compatible = "simple-bus"; @@ -85,7 +88,10 @@ <0x00 0x42040000 0x00 0x42040000 0x00 0x03ac2400>, /* WKUP */ <0x00 0x45100000 0x00 0x45100000 0x00 0x00c24000>, /* MMRs, remaining NAVSS */ <0x00 0x46000000 0x00 0x46000000 0x00 0x00200000>, /* CPSW */ - <0x00 0x47000000 0x00 0x47000000 0x00 0x00068400>; /* OSPI space 1 */ + <0x00 0x47000000 0x00 0x47000000 0x00 0x00068400>, /* OSPI space 1 */ + <0x00 0x50000000 0x00 0x50000000 0x00 0x8000000>, /* FSS OSPI0 data region 1 */ + <0x05 0x00000000 0x05 0x00000000 0x01 0x0000000>, /* FSS OSPI0 data region 3*/ + <0x07 0x00000000 0x07 0x00000000 0x01 0x0000000>; /* FSS OSPI1 data region 3*/ cbass_wakeup: interconnect@42040000 { compatible = "simple-bus"; diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts index 286393e8e357..db60fc29a424 100644 --- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts +++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts @@ -42,6 +42,22 @@ AM65X_WKUP_IOPAD(0x00e4, PIN_INPUT, 0) /* (AD6) WKUP_I2C0_SDA */ >; }; + + mcu_fss0_ospi0_pins_default: mcu-fss0-ospi0-pins_default { + pinctrl-single,pins = < + AM65X_WKUP_IOPAD(0x0000, PIN_OUTPUT, 0) /* (V1) MCU_OSPI0_CLK */ + AM65X_WKUP_IOPAD(0x0008, PIN_INPUT, 0) /* (U2) MCU_OSPI0_DQS */ + AM65X_WKUP_IOPAD(0x000c, PIN_INPUT, 0) /* (U4) MCU_OSPI0_D0 */ + AM65X_WKUP_IOPAD(0x0010, PIN_INPUT, 0) /* (U5) MCU_OSPI0_D1 */ + AM65X_WKUP_IOPAD(0x0014, PIN_INPUT, 0) /* (T2) MCU_OSPI0_D2 */ + AM65X_WKUP_IOPAD(0x0018, PIN_INPUT, 0) /* (T3) MCU_OSPI0_D3 */ + AM65X_WKUP_IOPAD(0x001c, PIN_INPUT, 0) /* (T4) MCU_OSPI0_D4 */ + AM65X_WKUP_IOPAD(0x0020, PIN_INPUT, 0) /* (T5) MCU_OSPI0_D5 */ + AM65X_WKUP_IOPAD(0x0024, PIN_INPUT, 0) /* (R2) MCU_OSPI0_D6 */ + AM65X_WKUP_IOPAD(0x0028, PIN_INPUT, 0) /* (R3) MCU_OSPI0_D7 */ + AM65X_WKUP_IOPAD(0x002c, PIN_OUTPUT, 0) /* (R4) MCU_OSPI0_CSn0 */ + >; + }; }; &main_pmx0 { @@ -247,3 +263,25 @@ ti,adc-channels = <0 1 2 3 4 5 6 7>; }; }; + +&ospi0 { + pinctrl-names = "default"; + pinctrl-0 = <&mcu_fss0_ospi0_pins_default>; + + flash@0{ + compatible = "jedec,spi-nor"; + reg = <0x0>; + spi-tx-bus-width = <1>; + spi-rx-bus-width = <8>; + spi-max-frequency = <50000000>; + spi-dqs; + cdns,tshsl-ns = <60>; + cdns,tsd2d-ns = <60>; + cdns,tchsh-ns = <60>; + cdns,tslch-ns = <60>; + cdns,read-delay = <2>; + cdns,phy-mode; + #address-cells = <1>; + #size-cells = <1>; + }; +}; diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 16c5da9be9fb..7ef982a84331 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -263,7 +263,7 @@ static int ghash_setkey(struct crypto_shash *tfm, static struct shash_alg ghash_alg[] = {{ .base.cra_name = "ghash", .base.cra_driver_name = "ghash-neon", - .base.cra_priority = 100, + .base.cra_priority = 150, .base.cra_blocksize = GHASH_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct ghash_key), .base.cra_module = THIS_MODULE, diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index 895d3727c1fb..c5405e6a6db7 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index b9f8d787eea9..aa9ad4097361 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -71,13 +71,13 @@ static inline void apply_alternatives_module(void *start, size_t length) { } ALTINSTR_ENTRY(feature,cb) \ ".popsection\n" \ " .if " __stringify(cb) " == 0\n" \ - ".pushsection .altinstr_replacement, \"a\"\n" \ + ".subsection 1\n" \ "663:\n\t" \ newinstr "\n" \ "664:\n\t" \ - ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ - ".org . - (662b-661b) + (664b-663b)\n" \ + ".org . - (662b-661b) + (664b-663b)\n\t" \ + ".previous\n" \ ".else\n\t" \ "663:\n\t" \ "664:\n\t" \ @@ -107,9 +107,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { } 662: .pushsection .altinstructions, "a" altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f .popsection - .pushsection .altinstr_replacement, "ax" + .subsection 1 663: \insn2 -664: .popsection +664: .previous .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) .endif @@ -150,7 +150,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .pushsection .altinstructions, "a" altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f .popsection - .pushsection .altinstr_replacement, "ax" + .subsection 1 .align 2 /* So GAS knows label 661 is suitably aligned */ 661: .endm @@ -169,9 +169,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .macro alternative_else 662: .if .Lasm_alt_mode==0 - .pushsection .altinstr_replacement, "ax" + .subsection 1 .else - .popsection + .previous .endif 663: .endm @@ -182,7 +182,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .macro alternative_endif 664: .if .Lasm_alt_mode==0 - .popsection + .previous .endif .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) @@ -211,7 +211,7 @@ alternative_endif .macro user_alt, label, oldinstr, newinstr, cond 9999: alternative_insn "\oldinstr", "\newinstr", \cond - _ASM_EXTABLE 9999b, \label + _asm_extable 9999b, \label .endm /* diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 89e4c8b79349..ee9bdaa40532 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -109,7 +109,7 @@ static inline u32 gic_read_pmr(void) return read_sysreg_s(SYS_ICC_PMR_EL1); } -static inline void gic_write_pmr(u32 val) +static __always_inline void gic_write_pmr(u32 val) { write_sysreg_s(val, SYS_ICC_PMR_EL1); } diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a05db636981a..842bad6cba3f 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -11,6 +11,7 @@ #define CTR_L1IP_MASK 3 #define CTR_DMINLINE_SHIFT 16 #define CTR_IMINLINE_SHIFT 0 +#define CTR_IMINLINE_MASK 0xf #define CTR_ERG_SHIFT 20 #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK 15 @@ -18,7 +19,7 @@ #define CTR_DIC_SHIFT 29 #define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT) + (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT) #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 1fe4467442aa..c0d07ca5a1a7 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -79,7 +79,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * IPI all online CPUs so that they undergo a context synchronization * event and are forced to refetch the new instructions. */ -#ifdef CONFIG_KGDB + /* * KGDB performs cache maintenance with interrupts disabled, so we * will deadlock trying to IPI the secondary CPUs. In theory, we can @@ -89,9 +89,9 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * the patching operation, so we don't need extra IPIs here anyway. * In which case, add a KGDB-specific bodge and return early. */ - if (kgdb_connected && irqs_disabled()) + if (in_dbg_master()) return; -#endif + kick_all_cpus_sync(); } diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index d064a50deb5f..5665a3fc14be 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -19,16 +19,17 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { __uint128_t tmp; u64 sum; + int n = ihl; /* we want it signed */ tmp = *(const __uint128_t *)iph; iph += 16; - ihl -= 4; + n -= 4; tmp += ((tmp >> 64) | (tmp << 64)); sum = tmp >> 64; do { sum += *(const u32 *)iph; iph += 4; - } while (--ihl); + } while (--n > 0); sum += ((sum >> 32) | (sum << 32)); return csum_fold((__force u32)(sum >> 32)); diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index fb8ad4616b3b..eab4f4946d6c 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -5,6 +5,9 @@ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H #ifdef __KERNEL__ + +#include <asm-generic/compat.h> + #ifdef CONFIG_COMPAT /* @@ -14,8 +17,6 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> -#include <asm-generic/compat.h> - #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ #define COMPAT_UTS_MACHINE "armv8b\0\0" diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index f19fe4b9acc4..3886b8f1f9dc 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -52,7 +52,10 @@ #define ARM64_HAS_IRQ_PRIO_MASKING 42 #define ARM64_HAS_DCPODP 43 #define ARM64_WORKAROUND_1463225 44 +#define ARM64_WORKAROUND_CAVIUM_36890 45 +#define ARM64_WORKAROUND_MRVL_37119 46 +#define ARM64_WORKAROUND_1542419 47 -#define ARM64_NCAPS 45 +#define ARM64_NCAPS 48 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 0a61344ab243..731f2c4161b9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -609,7 +609,7 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF)); } -static inline bool system_uses_irq_prio_masking(void) +static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index b1454d117cd2..f04cdf8a41fa 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,6 +79,10 @@ #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 #define CAVIUM_CPU_PART_THUNDERX2 0x0AF +#define MRVL_CPU_PART_OCTEONTX2_96XX 0x0B2 +#define MRVL_CPU_PART_OCTEONTX2_95XX 0x0B3 + +#define BRCM_CPU_PART_BRAHMA_B53 0x100 #define BRCM_CPU_PART_VULCAN 0x516 #define QCOM_CPU_PART_FALKOR_V1 0x800 @@ -105,6 +109,9 @@ #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) +#define MIDR_MRVL_OCTEONTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, MRVL_CPU_PART_OCTEONTX2_96XX) +#define MIDR_MRVL_OCTEONTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, MRVL_CPU_PART_OCTEONTX2_95XX) +#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 9c0e0178ea29..c9db3a5704a9 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -32,7 +32,7 @@ static inline void local_daif_mask(void) trace_hardirqs_off(); } -static inline unsigned long local_daif_save(void) +static inline unsigned long local_daif_save_flags(void) { unsigned long flags; @@ -44,6 +44,15 @@ static inline unsigned long local_daif_save(void) flags |= PSR_I_BIT; } + return flags; +} + +static inline unsigned long local_daif_save(void) +{ + unsigned long flags; + + flags = local_daif_save_flags(); + local_daif_mask(); return flags; diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index d8ec5bb881c2..6c289542cee5 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -111,6 +111,8 @@ void disable_debug_monitors(enum dbg_active_el el); void user_rewind_single_step(struct task_struct *task); void user_fastforward_single_step(struct task_struct *task); +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task); void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 897029c8e9b5..b6a2c352f4c3 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -37,8 +37,6 @@ struct task_struct; extern void fpsimd_save_state(struct user_fpsimd_state *state); extern void fpsimd_load_state(struct user_fpsimd_state *state); -extern void fpsimd_save(void); - extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); @@ -52,8 +50,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl); extern void fpsimd_flush_task_state(struct task_struct *target); -extern void fpsimd_flush_cpu_state(void); -extern void sve_flush_cpu_state(void); +extern void fpsimd_save_and_flush_cpu_state(void); /* Maximum VL that SVE VL-agnostic software can transparently support */ #define SVE_VL_ARCH_MAX 0x100 diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index b2b0c6405eb0..882de50937e9 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -6,6 +6,15 @@ #include <asm-generic/irq.h> +/* Platforms with multiple SR-IOV capable PCI devices will + * need large number of MSIX vectors, hence keep this number + * fairly high. + */ +#ifdef CONFIG_PCI_MSI +#undef NR_IRQS +#define NR_IRQS 65536 +#endif + struct pt_regs; static inline int nr_legacy_irqs(void) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 034dadec7168..53bc52270236 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -97,12 +97,6 @@ static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); } -static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) -{ - if (vcpu_has_ptrauth(vcpu)) - vcpu_ptrauth_disable(vcpu); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; @@ -204,6 +198,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } +/* + * The layout of SPSR for an AArch32 state is different when observed from an + * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 + * view given an AArch64 view. + * + * In ARM DDI 0487E.a see: + * + * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 + * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 + * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 + * + * Which show the following differences: + * + * | Bit | AA64 | AA32 | Notes | + * +-----+------+------+-----------------------------| + * | 24 | DIT | J | J is RES0 in ARMv8 | + * | 21 | SS | DIT | SS doesn't exist in AArch32 | + * + * ... and all other bits are (currently) common. + */ +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + const unsigned long overlap = BIT(24) | BIT(21); + unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); + + spsr &= ~overlap; + + spsr |= dit << 21; + + return spsr; +} + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode; @@ -263,6 +289,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9f19c354b165..5ecf3ea2ede4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -392,8 +392,10 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); * CP14 and CP15 live in the same array, as they are backed by the * same system registers. */ -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) +#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) struct kvm_vm_stat { ulong remote_tlb_flush; diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 02b5c48fd467..b204501a0c39 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -10,13 +10,11 @@ #include <linux/kvm_host.h> #include <asm/kvm_arm.h> -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Witdth of the register accessed by the faulting instruction is 64-bits */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 7ed0adb187a8..d6f46d8a72be 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -248,6 +248,12 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); +#ifdef CONFIG_MRVL_OCTEONTX_EL0_INTR +int lock_context(struct mm_struct *mm, int index); +int unlock_context_by_index(int index); +bool unlock_context_by_mm(struct mm_struct *mm); +#endif + #endif /* !__ASSEMBLY__ */ #endif /* !__ASM_MMU_CONTEXT_H */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index c81583be034b..3b3c5e4f8e61 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -32,11 +32,11 @@ #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -54,7 +54,7 @@ #define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) #define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) -#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(_PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_HYP | PTE_HYP_XN) #define PAGE_S2_MEMATTR(attr) \ ({ \ @@ -80,17 +80,17 @@ #define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ +#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -99,7 +99,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4e07aa514f60..0edd9a0e945f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -95,12 +95,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -/* - * Execute-only user mappings do not have the PTE_USER bit set. All valid - * kernel mappings have the PTE_UXN bit set. - */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ @@ -116,8 +112,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; /* * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check) other than user execute-only which do not have the - * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set. + * write permission check). PROT_NONE mappings do not have the PTE_VALID bit + * set. */ #define pte_access_permitted(pte, write) \ (pte_valid_user(pte) && (!(write) || pte_write(pte))) @@ -264,23 +260,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pte); } -#define __HAVE_ARCH_PTE_SAME -static inline int pte_same(pte_t pte_a, pte_t pte_b) -{ - pteval_t lhs, rhs; - - lhs = pte_val(pte_a); - rhs = pte_val(pte_b); - - if (pte_present(pte_a)) - lhs &= ~PTE_RDONLY; - - if (pte_present(pte_b)) - rhs &= ~PTE_RDONLY; - - return (lhs == rhs); -} - /* * Huge pte definitions. */ @@ -434,6 +413,7 @@ extern pgd_t init_pg_dir[PTRS_PER_PGD]; extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_end[]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index d328540cb85e..a7ac50fb5abc 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -3,7 +3,6 @@ #define __ASM_POINTER_AUTH_H #include <linux/bitops.h> -#include <linux/random.h> #include <asm/cpufeature.h> #include <asm/memory.h> @@ -30,6 +29,13 @@ struct ptrauth_keys { struct ptrauth_key apga; }; +/* + * Only include random.h once ptrauth_keys_* structures are defined + * to avoid yet another circular include hell (random.h * ends up + * including asm/smp.h, which requires ptrauth_keys_kernel). + */ +#include <linux/random.h> + static inline void ptrauth_keys_init(struct ptrauth_keys *keys) { if (system_supports_address_auth()) { diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 701eaa738187..1dc2f99e6508 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -62,6 +62,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 7e245b9e03a5..7434844036d3 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -12,9 +12,9 @@ #include <linux/preempt.h> #include <linux/types.h> -#ifdef CONFIG_KERNEL_MODE_NEON +DECLARE_PER_CPU(bool, fpsimd_context_busy); -DECLARE_PER_CPU(bool, kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * may_use_simd - whether it is allowable at this time to issue SIMD @@ -26,15 +26,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * kernel_neon_busy is only set while preemption is disabled, + * fpsimd_context_busy is only set while preemption is disabled, * and is clear whenever preemption is enabled. Since - * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy * cannot change under our feet -- if it's set we cannot be * migrated, and if it's clear we cannot be migrated to a CPU * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(kernel_neon_busy); + !this_cpu_read(fpsimd_context_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 65299a2dcf9c..cfc0672013f6 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -34,6 +34,10 @@ static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->regs[0]; + + if (is_compat_thread(task_thread_info(task))) + error = sign_extend64(error, 31); + return IS_ERR_VALUE(error) ? error : 0; } @@ -47,7 +51,13 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->regs[0] = (long) error ? error : val; + if (error) + val = error; + + if (is_compat_thread(task_thread_info(task))) + val = lower_32_bits(val); + + regs->regs[0] = val; } #define SYSCALL_MAX_ARGS 6 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cd7f7ce1a56a..8692ee6b56f3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -49,7 +49,9 @@ #ifndef CONFIG_BROKEN_GAS_INST #ifdef __ASSEMBLY__ -#define __emit_inst(x) .inst (x) +// The space separator is omitted so that __emit_inst(x) can be parsed as +// either an assembler directive or an assembler macro argument. +#define __emit_inst(x) .inst(x) #else #define __emit_inst(x) ".inst " __stringify((x)) "\n\t" #endif diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 2372e97db29c..90441da764ab 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -100,6 +100,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index e932284993d4..b1296c839a3e 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -49,6 +49,7 @@ #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 +#define PSR_DIT_BIT 0x01000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 3a58e9db5cfe..a100483b47c4 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -274,7 +274,7 @@ int apei_claim_sea(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) return err; - current_flags = arch_local_save_flags(); + current_flags = local_daif_save_flags(); /* * SEA can interrupt SError, mask it and describe this as an NMI so diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d1757ef1b1e7..73039949b5ce 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -43,20 +43,8 @@ bool alternative_is_applied(u16 cpufeature) */ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { - unsigned long replptr; - - if (kernel_text_address(pc)) - return true; - - replptr = (unsigned long)ALT_REPL_PTR(alt); - if (pc >= replptr && pc <= (replptr + alt->alt_len)) - return false; - - /* - * Branching into *another* alternate sequence is doomed, and - * we're not even trying to fix it up. - */ - BUG(); + unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); + return !(pc >= replptr && pc <= (replptr + alt->alt_len)); } #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 2ec09debc2bb..bcb14d11232f 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -174,6 +174,9 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops) struct insn_emulation *insn; insn = kzalloc(sizeof(*insn), GFP_KERNEL); + if (!insn) + return; + insn->ops = ops; insn->min = INSN_UNDEF; @@ -233,6 +236,8 @@ static void __init register_insn_emulation_sysctl(void) insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl), GFP_KERNEL); + if (!insns_sysctl) + return; raw_spin_lock_irqsave(&insn_emulation_lock, flags); list_for_each_entry(insn, &insn_emulation, node) { @@ -596,7 +601,7 @@ static struct undef_hook setend_hooks[] = { }, { /* Thumb mode */ - .instr_mask = 0x0000fff7, + .instr_mask = 0xfffffff7, .instr_val = 0x0000b650, .pstate_mask = (PSR_AA32_T_BIT | PSR_AA32_MODE_MASK), .pstate_val = (PSR_AA32_T_BIT | PSR_AA32_MODE_USR), diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ca11ff7bf55e..2ce5da59778d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -87,16 +87,41 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, } static void -cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) +cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) { u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + bool enable_uct_trap = false; /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ if ((read_cpuid_cachetype() & mask) != (arm64_ftr_reg_ctrel0.sys_val & mask)) + enable_uct_trap = true; + + /* ... or if the system is affected by an erratum */ + if (cap->capability == ARM64_WORKAROUND_1542419) + enable_uct_trap = true; + + if (enable_uct_trap) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } +#ifdef CONFIG_CAVIUM_ERRATUM_36890 +static void +cpu_enable_trap_zva_access(const struct arm64_cpu_capabilities *__unused) +{ + /* + * Clear SCTLR_EL2.DZE or SCTLR_EL1.DZE depending + * on if we are in EL2. + */ + if (!is_kernel_in_hyp_mode()) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_DZE, 0); + else + sysreg_clear_set(sctlr_el2, SCTLR_EL1_DZE, 0); + + return; +} +#endif + atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); #include <asm/mmu_context.h> @@ -128,8 +153,8 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, int cpu, slot = -1; /* - * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs - * start/end if we're a guest. Skip the hyp-vectors work. + * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if + * we're a guest. Skip the hyp-vectors work. */ if (!hyp_vecs_start) { __this_cpu_write(bp_hardening_data.fn, fn); @@ -488,6 +513,7 @@ static const struct midr_range arm64_ssb_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), {}, }; @@ -561,6 +587,8 @@ static const struct midr_range spectre_v2_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), { /* sentinel */ } }; @@ -612,6 +640,18 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } +static bool __maybe_unused +has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, + int scope) +{ + u32 midr = read_cpuid_id(); + bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT); + const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1); + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range(midr, &range) && has_dic; +} + #ifdef CONFIG_HARDEN_EL2_VECTORS static const struct midr_range arm64_harden_el2_vectors[] = { @@ -623,17 +663,23 @@ static const struct midr_range arm64_harden_el2_vectors[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI - -static const struct midr_range arm64_repeat_tlbi_cpus[] = { +static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0), + { + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0) + }, + { + .midr_range.model = MIDR_QCOM_KRYO, + .matches = is_kryo_midr, + }, #endif #ifdef CONFIG_ARM64_ERRATUM_1286807 - MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + { + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + }, #endif {}, }; - #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 @@ -658,6 +704,32 @@ static const struct midr_range cavium_erratum_30115_cpus[] = { }; #endif +#ifdef CONFIG_CAVIUM_ERRATUM_36890 +static const struct midr_range cavium_erratum_36890_cpus[] = { + /* Cavium ThunderX, T88 all passes */ + MIDR_ALL_VERSIONS(MIDR_THUNDERX), + /* Cavium ThunderX, T81 all passes */ + MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX), + /* Cavium ThunderX, T83 all passes */ + MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX), + /* Marvell OcteonTX 2, 96xx pass A0, A1, and B0 */ + MIDR_RANGE(MIDR_MRVL_OCTEONTX2_96XX, 0, 0, 1, 0), + /* Marvell OcteonTX 2, 95 pass A0/A1 */ + MIDR_RANGE(MIDR_MRVL_OCTEONTX2_95XX, 0, 0, 0, 1), + {}, +}; +#endif + +#ifdef CONFIG_MRVL_ERRATUM_37119 +static const struct midr_range mrvl_erratum_37119_cpus[] = { + /* Marvell OcteonTX 2, 96xx pass A0, A1, and B0 */ + MIDR_RANGE(MIDR_MRVL_OCTEONTX2_96XX, 0, 0, 1, 0), + /* Marvell OcteonTX 2, 95 pass A0/A1 */ + MIDR_RANGE(MIDR_MRVL_OCTEONTX2_95XX, 0, 0, 0, 1), + {}, +}; +#endif + #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 static const struct arm64_cpu_capabilities qcom_erratum_1003_list[] = { { @@ -701,6 +773,33 @@ static const struct midr_range erratum_1418040_list[] = { }; #endif +#ifdef CONFIG_ARM64_ERRATUM_845719 +static const struct midr_range erratum_845719_list[] = { + /* Cortex-A53 r0p[01234] */ + MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + /* Brahma-B53 r0p[0] */ + MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + {}, +}; +#endif + +#ifdef CONFIG_ARM64_ERRATUM_843419 +static const struct arm64_cpu_capabilities erratum_843419_list[] = { + { + /* Cortex-A53 r0p[01234] */ + .matches = is_affected_midr_range, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + MIDR_FIXED(0x4, BIT(8)), + }, + { + /* Brahma-B53 r0p[0] */ + .matches = is_affected_midr_range, + ERRATA_MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + }, + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -732,19 +831,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_843419 { - /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 843419", .capability = ARM64_WORKAROUND_843419, - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), - MIDR_FIXED(0x4, BIT(8)), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = cpucap_multi_entry_cap_matches, + .match_list = erratum_843419_list, }, #endif #ifdef CONFIG_ARM64_ERRATUM_845719 { - /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 845719", .capability = ARM64_WORKAROUND_845719, - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + ERRATA_MIDR_RANGE_LIST(erratum_845719_list), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 @@ -769,6 +867,21 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(cavium_erratum_30115_cpus), }, #endif +#ifdef CONFIG_CAVIUM_ERRATUM_36890 + { + .desc = "Cavium erratum 36890", + .capability = ARM64_WORKAROUND_CAVIUM_36890, + ERRATA_MIDR_RANGE_LIST(cavium_erratum_36890_cpus), + .cpu_enable = cpu_enable_trap_zva_access, + }, +#endif +#ifdef CONFIG_MRVL_ERRATUM_37119 + { + .desc = "Marvell erratum 37119", + .capability = ARM64_WORKAROUND_MRVL_37119, + ERRATA_MIDR_RANGE_LIST(mrvl_erratum_37119_cpus), + }, +#endif { .desc = "Mismatched cache type (CTR_EL0)", .capability = ARM64_MISMATCHED_CACHE_TYPE, @@ -780,6 +893,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor/Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, .match_list = qcom_erratum_1003_list, }, @@ -788,7 +902,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm erratum 1009, ARM erratum 1286807", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = cpucap_multi_entry_cap_matches, + .match_list = arm64_repeat_tlbi_list, }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 @@ -841,6 +957,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_cortex_a76_erratum_1463225, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1542419 + { + /* we depend on the firmware portion for correctness */ + .desc = "ARM erratum 1542419 (kernel portion)", + .capability = ARM64_WORKAROUND_1542419, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_neoverse_n1_erratum_1542419, + .cpu_enable = cpu_enable_trap_ctr_access, + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d3fbb89a31e5..7bbea1b179f8 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -32,9 +32,7 @@ static unsigned long elf_hwcap __read_mostly; #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -175,11 +173,16 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1361,7 +1364,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1589,6 +1592,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .match_list = list, \ } +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ + } + #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { @@ -1660,8 +1669,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 48222a4760c2..d64a3c1e1b6b 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -141,17 +141,20 @@ postcore_initcall(debug_monitors_init); /* * Single step API and exception handling. */ -static void set_regs_spsr_ss(struct pt_regs *regs) +static void set_user_regs_spsr_ss(struct user_pt_regs *regs) { regs->pstate |= DBG_SPSR_SS; } -NOKPROBE_SYMBOL(set_regs_spsr_ss); +NOKPROBE_SYMBOL(set_user_regs_spsr_ss); -static void clear_regs_spsr_ss(struct pt_regs *regs) +static void clear_user_regs_spsr_ss(struct user_pt_regs *regs) { regs->pstate &= ~DBG_SPSR_SS; } -NOKPROBE_SYMBOL(clear_regs_spsr_ss); +NOKPROBE_SYMBOL(clear_user_regs_spsr_ss); + +#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs) +#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs) static DEFINE_SPINLOCK(debug_hook_lock); static LIST_HEAD(user_step_hook); @@ -393,17 +396,26 @@ void user_rewind_single_step(struct task_struct *task) * If single step is active for this thread, then set SPSR.SS * to 1 to avoid returning to the active-pending state. */ - if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { - if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) clear_regs_spsr_ss(task_pt_regs(task)); } +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) + set_user_regs_spsr_ss(regs); + else + clear_user_regs_spsr_ss(regs); +} + /* Kernel API */ void kernel_enable_single_step(struct pt_regs *regs) { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 84a822748c84..9e3c33f56312 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -364,6 +364,9 @@ alternative_else_nop_endif ldr lr, [sp, #S_LR] add sp, sp, #S_FRAME_SIZE // restore sp +#ifdef CONFIG_MRVL_ERRATUM_37119 +alternative_insn nop, "tlbi aside1, xzr", ARM64_WORKAROUND_MRVL_37119 +#endif .if \el == 0 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -775,6 +778,7 @@ el0_sync_compat: b.ge el0_dbg b el0_inv el0_svc_compat: + gic_prio_kentry_setup tmp=x1 mov x0, sp bl el0_svc_compat_handler b ret_to_user diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0cfcf5c237c5..eef3dc2488e2 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -82,7 +82,8 @@ * To prevent this from racing with the manipulation of the task's FPSIMD state * from task context and thereby corrupting the state, it is necessary to * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE - * flag with local_bh_disable() unless softirqs are already masked. + * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to + * run but prevent them to use FPSIMD. * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_cpu field @@ -145,6 +146,56 @@ extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ +DEFINE_PER_CPU(bool, fpsimd_context_busy); +EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); + +static void __get_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, true); + + WARN_ON(busy); +} + +/* + * Claim ownership of the CPU FPSIMD context for use by the calling context. + * + * The caller may freely manipulate the FPSIMD context metadata until + * put_cpu_fpsimd_context() is called. + * + * The double-underscore version must only be called if you know the task + * can't be preempted. + */ +static void get_cpu_fpsimd_context(void) +{ + preempt_disable(); + __get_cpu_fpsimd_context(); +} + +static void __put_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, false); + + WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ +} + +/* + * Release the CPU FPSIMD context. + * + * Must be called from a context in which get_cpu_fpsimd_context() was + * previously called, with no call to put_cpu_fpsimd_context() in the + * meantime. + */ +static void put_cpu_fpsimd_context(void) +{ + __put_cpu_fpsimd_context(); + preempt_enable(); +} + +static bool have_cpu_fpsimd_context(void) +{ + return !preemptible() && __this_cpu_read(fpsimd_context_busy); +} + /* * Call __sve_free() directly only if you know task can't be scheduled * or preempted. @@ -215,12 +266,11 @@ static void sve_free(struct task_struct *task) * This function should be called only when the FPSIMD/SVE state in * thread_struct is known to be up to date, when preparing to enter * userspace. - * - * Softirqs (and preemption) must be disabled. */ static void task_fpsimd_load(void) { - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!system_supports_fpsimd()); + WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), @@ -233,16 +283,15 @@ static void task_fpsimd_load(void) /* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to * date with respect to the CPU registers. - * - * Softirqs (and preemption) must be disabled. */ -void fpsimd_save(void) +static void fpsimd_save(void) { struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!system_supports_fpsimd()); + WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { if (system_supports_sve() && test_thread_flag(TIF_SVE)) { @@ -289,7 +338,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) return sve_vl_from_vq(__bit_to_vq(bit)); } -#ifdef CONFIG_SYSCTL +#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -335,9 +384,9 @@ static int __init sve_sysctl_init(void) return 0; } -#else /* ! CONFIG_SYSCTL */ +#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ static int __init sve_sysctl_init(void) { return 0; } -#endif /* ! CONFIG_SYSCTL */ +#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) @@ -364,7 +413,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x) * task->thread.sve_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.uw.fpsimd_state must be up to date before calling this @@ -393,7 +443,8 @@ static void fpsimd_to_sve(struct task_struct *task) * task->thread.uw.fpsimd_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. @@ -557,7 +608,7 @@ int sve_set_vector_length(struct task_struct *task, * non-SVE thread. */ if (task == current) { - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); } @@ -567,7 +618,7 @@ int sve_set_vector_length(struct task_struct *task, sve_to_fpsimd(task); if (task == current) - local_bh_enable(); + put_cpu_fpsimd_context(); /* * Force reallocation of task SVE state to the correct size @@ -880,7 +931,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) sve_alloc(current); - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); @@ -891,7 +942,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -935,6 +986,8 @@ void fpsimd_thread_switch(struct task_struct *next) if (!system_supports_fpsimd()) return; + __get_cpu_fpsimd_context(); + /* Save unsaved fpsimd state, if any: */ fpsimd_save(); @@ -949,6 +1002,8 @@ void fpsimd_thread_switch(struct task_struct *next) update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, wrong_task || wrong_cpu); + + __put_cpu_fpsimd_context(); } void fpsimd_flush_thread(void) @@ -958,7 +1013,7 @@ void fpsimd_flush_thread(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_flush_task_state(current); memset(¤t->thread.uw.fpsimd_state, 0, @@ -999,7 +1054,7 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1011,9 +1066,9 @@ void fpsimd_preserve_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1030,13 +1085,15 @@ void fpsimd_signal_preserve_current_state(void) /* * Associate current's FPSIMD context with this cpu - * Preemption must be disabled when calling this function. + * The caller must have ownership of the cpu FPSIMD context before calling + * this function. */ void fpsimd_bind_task_to_cpu(void) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; @@ -1059,6 +1116,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; @@ -1073,17 +1131,28 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } - local_bh_disable(); + get_cpu_fpsimd_context(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { task_fpsimd_load(); fpsimd_bind_task_to_cpu(); } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1093,10 +1162,10 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct user_fpsimd_state const *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; - local_bh_disable(); + get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -1107,7 +1176,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) clear_thread_flag(TIF_FOREIGN_FPSTATE); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1124,7 +1193,13 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) void fpsimd_flush_task_state(struct task_struct *t) { t->thread.fpsimd_cpu = NR_CPUS; - + /* + * If we don't support fpsimd, bail out after we have + * reset the fpsimd_cpu for this task and clear the + * FPSTATE. + */ + if (!system_supports_fpsimd()) + return; barrier(); set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE); @@ -1133,18 +1208,32 @@ void fpsimd_flush_task_state(struct task_struct *t) /* * Invalidate any task's FPSIMD state that is present on this cpu. - * This function must be called with softirqs disabled. + * The FPSIMD context should be acquired with get_cpu_fpsimd_context() + * before calling this function. */ -void fpsimd_flush_cpu_state(void) +static void fpsimd_flush_cpu_state(void) { + WARN_ON(!system_supports_fpsimd()); __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } -#ifdef CONFIG_KERNEL_MODE_NEON +/* + * Save the FPSIMD state to memory and invalidate cpu view. + * This function must be called with preemption disabled. + */ +void fpsimd_save_and_flush_cpu_state(void) +{ + if (!system_supports_fpsimd()) + return; + WARN_ON(preemptible()); + __get_cpu_fpsimd_context(); + fpsimd_save(); + fpsimd_flush_cpu_state(); + __put_cpu_fpsimd_context(); +} -DEFINE_PER_CPU(bool, kernel_neon_busy); -EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * Kernel-side NEON support functions @@ -1170,19 +1259,13 @@ void kernel_neon_begin(void) BUG_ON(!may_use_simd()); - local_bh_disable(); - - __this_cpu_write(kernel_neon_busy, true); + get_cpu_fpsimd_context(); /* Save unsaved fpsimd state, if any: */ fpsimd_save(); /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); - - preempt_disable(); - - local_bh_enable(); } EXPORT_SYMBOL(kernel_neon_begin); @@ -1197,15 +1280,10 @@ EXPORT_SYMBOL(kernel_neon_begin); */ void kernel_neon_end(void) { - bool busy; - if (!system_supports_fpsimd()) return; - busy = __this_cpu_xchg(kernel_neon_busy, false); - WARN_ON(!busy); /* No matching kernel_neon_begin()? */ - - preempt_enable(); + put_cpu_fpsimd_context(); } EXPORT_SYMBOL(kernel_neon_end); @@ -1297,8 +1375,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); break; case CPU_PM_EXIT: break; diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 171773257974..06e56b470315 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -121,10 +121,16 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* * Ensure updated trampoline is visible to instruction - * fetch before we patch in the branch. + * fetch before we patch in the branch. Although the + * architecture doesn't require an IPI in this case, + * Neoverse-N1 erratum #1542419 does require one + * if the TLB maintenance in module_enable_ro() is + * skipped due to rodata_enabled. It doesn't seem worth + * it to make it conditional given that this is + * certainly not a fast-path. */ - __flush_icache_range((unsigned long)&dst[0], - (unsigned long)&dst[1]); + flush_icache_range((unsigned long)&dst[0], + (unsigned long)&dst[1]); } addr = (unsigned long)dst; #else /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2cdacd1c141b..191ad192a741 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -391,13 +391,19 @@ __create_page_tables: /* * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. + * accesses (MMU disabled), invalidate those tables again to + * remove any speculatively loaded cache lines. */ + dmb sy + adrp x0, idmap_pg_dir + adrp x1, idmap_pg_end + sub x1, x1, x0 + bl __inval_dcache_area + + adrp x0, init_pg_dir adrp x1, init_pg_end sub x1, x1, x0 - dmb sy bl __inval_dcache_area ret x28 diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 67b3bae50b92..29eb4636d095 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -726,6 +726,27 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, return 0; } +static int watchpoint_report(struct perf_event *wp, unsigned long addr, + struct pt_regs *regs) +{ + int step = is_default_overflow_handler(wp); + struct arch_hw_breakpoint *info = counter_arch_bp(wp); + + info->trigger = addr; + + /* + * If we triggered a user watchpoint from a uaccess routine, then + * handle the stepping ourselves since userspace really can't help + * us with this. + */ + if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0) + step = 1; + else + perf_bp_event(wp, regs); + + return step; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -735,7 +756,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; - struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); @@ -773,25 +793,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, if (dist != 0) continue; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; + step = watchpoint_report(wp, addr, regs); } - if (min_dist > 0 && min_dist != -1) { - /* No exact match found. */ - wp = slots[closest_match]; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; - } + /* No exact match found? */ + if (min_dist > 0 && min_dist != -1) + step = watchpoint_report(slots[closest_match], addr, regs); + rcu_read_unlock(); if (!step) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 84b059ed04fc..63b511735e5c 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -1508,16 +1508,10 @@ static u32 aarch64_encode_immediate(u64 imm, u32 insn) { unsigned int immr, imms, n, ones, ror, esz, tmp; - u64 mask = ~0UL; - - /* Can't encode full zeroes or full ones */ - if (!imm || !~imm) - return AARCH64_BREAK_FAULT; + u64 mask; switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - if (upper_32_bits(imm)) - return AARCH64_BREAK_FAULT; esz = 32; break; case AARCH64_INSN_VARIANT_64BIT: @@ -1529,6 +1523,12 @@ static u32 aarch64_encode_immediate(u64 imm, return AARCH64_BREAK_FAULT; } + mask = GENMASK(esz - 1, 0); + + /* Can't encode full zeroes, full ones, or value wider than the mask */ + if (!imm || imm == mask || imm & ~mask) + return AARCH64_BREAK_FAULT; + /* * Inverse of Replicate(). Try to spot a repeating pattern * with a pow2 stride. diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 43119922341f..1a157ca33262 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -252,7 +252,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) if (!kgdb_single_step) return DBG_HOOK_ERROR; - kgdb_handle_exception(1, SIGTRAP, 0, regs); + kgdb_handle_exception(0, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_step_brk_fn); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 0df8493624e0..cc049ff5c6a5 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -189,6 +189,7 @@ void machine_kexec(struct kimage *kimage) * the offline CPUs. Therefore, we must use the __* variant here. */ __flush_icache_range((uintptr_t)reboot_code_buffer, + (uintptr_t)reboot_code_buffer + arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 96e90e270042..1439cff99a28 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -154,7 +154,7 @@ armv8pmu_events_sysfs_show(struct device *dev, pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%03llx\n", pmu_attr->id); + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); } #define ARMV8_EVENT_ATTR_RESOLVE(m) #m @@ -303,10 +303,13 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) return attr->mode; - pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; - if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && - test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap)) - return attr->mode; + if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) { + u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; + + if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(id, cpu_pmu->pmceid_ext_bitmap)) + return attr->mode; + } return 0; } diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 0bbac612146e..7c6ae5cf0365 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -15,15 +15,34 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return 0; /* - * Compat (i.e. 32 bit) mode: - * - PC has been set in the pt_regs struct in kernel_entry, - * - Handle SP and LR here. + * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but + * we're stuck with it for ABI compatability reasons. + * + * For a 32-bit consumer inspecting a 32-bit task, then it will look at + * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). + * These correspond directly to a prefix of the registers saved in our + * 'struct pt_regs', with the exception of the PC, so we copy that down + * (x15 corresponds to SP_hyp in the architecture). + * + * So far, so good. + * + * The oddity arises when a 64-bit consumer looks at a 32-bit task and + * asks for registers beyond PERF_REG_ARM_MAX. In this case, we return + * SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and + * PC registers would normally live. The initial idea was to allow a + * 64-bit unwinder to unwind a 32-bit task and, although it's not clear + * how well that works in practice, somebody might be relying on it. + * + * At the time we make a sample, we don't know whether the consumer is + * 32-bit or 64-bit, so we have to cater for both possibilities. */ if (compat_user_mode(regs)) { if ((u32)idx == PERF_REG_ARM64_SP) return regs->compat_sp; if ((u32)idx == PERF_REG_ARM64_LR) return regs->compat_lr; + if (idx == 15) + return regs->pc; } if ((u32)idx == PERF_REG_ARM64_SP) @@ -32,6 +51,10 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) if ((u32)idx == PERF_REG_ARM64_PC) return regs->pc; + if (perf_reg_abi(current) == PERF_SAMPLE_REGS_ABI_32 + && idx == 15) + return regs->pc; + return regs->regs[idx]; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 398601b29627..c6d7a2912e30 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -455,6 +455,13 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || test_tsk_thread_flag(next, TIF_SSBD)) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 85ee7d07889e..ea2d82bd93d8 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -76,7 +76,8 @@ static void cpu_psci_cpu_die(unsigned int cpu) static int cpu_psci_cpu_kill(unsigned int cpu) { - int err, i; + int err; + unsigned long start, end; if (!psci_ops.affinity_info) return 0; @@ -86,16 +87,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu) * while it is dying. So, try again a few times. */ - for (i = 0; i < 10; i++) { + start = jiffies; + end = start + msecs_to_jiffies(100); + do { err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed.\n", cpu); + pr_info("CPU%d killed (polled %d ms)\n", cpu, + jiffies_to_msecs(jiffies - start)); return 0; } - msleep(10); - pr_info("Retrying again to check for CPU kill\n"); - } + usleep_range(100, 1000); + } while (time_before(jiffies, end)); pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", cpu, err); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index da2441d7b066..49bc065cf3f0 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -615,6 +615,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -637,6 +644,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -676,6 +686,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, { int ret; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0); if (ret) return ret; @@ -1134,6 +1147,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -1348,6 +1362,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; if (target == current) @@ -1381,6 +1398,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1438,6 +1458,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, @@ -1798,12 +1819,23 @@ static void tracehook_report_syscall(struct pt_regs *regs, saved_reg = regs->regs[regno]; regs->regs[regno] = dir; - if (dir == PTRACE_SYSCALL_EXIT) + if (dir == PTRACE_SYSCALL_ENTER) { + if (tracehook_report_syscall_entry(regs)) + forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else if (!test_thread_flag(TIF_SINGLESTEP)) { tracehook_report_syscall_exit(regs, 0); - else if (tracehook_report_syscall_entry(regs)) - forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else { + regs->regs[regno] = saved_reg; - regs->regs[regno] = saved_reg; + /* + * Signal a pseudo-step exception since we are stepping but + * tracer modifications to the registers may have rewound the + * state machine. + */ + tracehook_report_syscall_exit(regs, 1); + } } int syscall_trace_enter(struct pt_regs *regs) @@ -1826,12 +1858,14 @@ int syscall_trace_enter(struct pt_regs *regs) void syscall_trace_exit(struct pt_regs *regs) { + unsigned long flags = READ_ONCE(current_thread_info()->flags); + audit_syscall_exit(regs); - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + if (flags & _TIF_SYSCALL_TRACEPOINT) trace_sys_exit(regs, regs_return_value(regs)); - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); rseq_syscall(regs); @@ -1909,8 +1943,8 @@ static int valid_native_regs(struct user_pt_regs *regs) */ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SINGLESTEP)) - regs->pstate &= ~DBG_SPSR_SS; + /* https://lore.kernel.org/lkml/20191118131525.GA4180@willie-the-truck */ + user_regs_reset_single_step(regs, task); if (is_compat_thread(task_thread_info(task))) return valid_compat_regs(regs); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index dd2cdc0d5be2..ddb757b2c3e5 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -782,7 +782,6 @@ static void setup_restart_syscall(struct pt_regs *regs) */ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { - struct task_struct *tsk = current; sigset_t *oldset = sigmask_to_save(); int usig = ksig->sig; int ret; @@ -806,14 +805,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) */ ret |= !valid_user_regs(®s->user_regs, current); - /* - * Fast forward the stepping logic so we step into the signal - * handler. - */ - if (!ret) - user_fastforward_single_step(tsk); - - signal_setup_done(ret, ksig, 0); + /* Step into the signal handler if we are stepping */ + signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP)); } /* diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a1aed6a1b8da..538b4ba66f5f 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -959,11 +959,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + void smp_send_stop(void) { unsigned long timeout; - if (num_online_cpus() > 1) { + if (num_other_online_cpus()) { cpumask_t mask; cpumask_copy(&mask, cpu_online_mask); @@ -976,10 +987,10 @@ void smp_send_stop(void) /* Wait up to one second for other CPUs to stop */ timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && timeout--) + while (num_other_online_cpus() && timeout--) udelay(1); - if (num_online_cpus() > 1) + if (num_other_online_cpus()) pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", cpumask_pr_args(cpu_online_mask)); @@ -1002,7 +1013,11 @@ void crash_smp_send_stop(void) cpus_stopped = 1; - if (num_online_cpus() == 1) { + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) { sdei_mask_local_cpu(); return; } @@ -1010,7 +1025,7 @@ void crash_smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_CRASH_STOP); diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index f1cb64959427..3c18c2454089 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -8,6 +8,7 @@ */ #include <linux/compat.h> +#include <linux/cpufeature.h> #include <linux/personality.h> #include <linux/sched.h> #include <linux/sched/signal.h> @@ -17,6 +18,7 @@ #include <asm/cacheflush.h> #include <asm/system_misc.h> +#include <asm/tlbflush.h> #include <asm/unistd.h> static long @@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end) if (fatal_signal_pending(current)) return 0; + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { + /* + * The workaround requires an inner-shareable tlbi. + * We pick the reserved-ASID to minimise the impact. + */ + __tlbi(aside1is, __TLBI_VADDR(0, 0)); + dsb(ish); + } + ret = __flush_cache_user_range(start, start + chunk); if (ret) return ret; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 871c739f060a..1457a0ba83db 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -50,6 +50,9 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } + if (is_compat_task()) + ret = lower_32_bits(ret); + regs->regs[0] = ret; } @@ -121,7 +124,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags)) { + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) { /* * We're off to userspace, where interrupts are * always enabled after we restore the flags from diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b6706a886037..7788e4ea131b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -480,6 +480,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { + /* Hide DIC so that we can trap the unnecessary maintenance...*/ + val &= ~BIT(CTR_DIC_SHIFT); + + /* ... and fake IminLine to reduce the number of traps. */ + val &= ~CTR_IMINLINE_MASK; + val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK; + } + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 803b24d2464a..61d63657ab59 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -142,6 +142,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); idmap_pg_dir = .; . += IDMAP_DIR_SIZE; + idmap_pg_end = .; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 tramp_pg_dir = .; @@ -171,9 +172,6 @@ SECTIONS *(.altinstructions) __alt_instructions_end = .; } - .altinstr_replacement : { - *(.altinstr_replacement) - } . = ALIGN(PAGE_SIZE); __inittext_end = .; diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 43487f035385..7a7e425616b5 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -101,7 +101,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY); - unsigned long mdscr; + unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); @@ -197,6 +197,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + /* Write mdcr_el2 changes since vcpu_load on VHE systems */ + if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 6e3c9c8b2df9..525010504f9d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; - /* Clean guest FP state to memory and invalidate cpu view */ - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfd626447482..5271ab366bee 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -202,6 +202,13 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); + + if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { + int i; + + for (i = 0; i < 16; i++) + *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + } out: return err; } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 706cca23f0d2..1249f68a9418 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -162,31 +162,16 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) - /* * Handle the guest trying to use a ptrauth instruction, or trying to access a * ptrauth register. */ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *ctxt; - - if (vcpu_has_ptrauth(vcpu)) { + if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_enable(vcpu); - ctxt = vcpu->arch.host_cpu_context; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); - } else { + else kvm_inject_undefined(vcpu); - } } /* diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 160be2b4696d..dc41b505507d 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -136,11 +136,15 @@ ENTRY(__kvm_handle_stub_hvc) 1: cmp x0, #HVC_RESET_VECTORS b.ne 1f -reset: + /* - * Reset kvm back to the hyp stub. Do not clobber x0-x4 in - * case we coming via HVC_SOFT_RESTART. + * Set the HVC_RESET_VECTORS return code before entering the common + * path so that we do not clobber x0-x2 in case we are coming via + * HVC_SOFT_RESTART. */ + mov x0, xzr +reset: + /* Reset kvm back to the hyp stub. */ mrs x5, sctlr_el2 ldr x6, =SCTLR_ELx_FLAGS bic x5, x5, x6 // Clear SCTL_M and etc @@ -151,7 +155,6 @@ reset: /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 - mov x0, xzr eret 1: /* Bad stub call */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 58f281b6ca4a..9c7912923b74 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -28,7 +28,15 @@ /* Check whether the FP regs were dirtied while in the host-side run loop: */ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) { - if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index a9d25a305af5..a364a4ad5479 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,9 +14,6 @@ #include <asm/kvm_emulate.h> #include <asm/esr.h> -#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ - PSR_I_BIT | PSR_D_BIT) - #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 #define LOWER_EL_AArch64_VECTOR 0x400 @@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +{ + unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= PSR_MODE_EL1h; + + return new; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); /* diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f4a8ae918827..784d485218ca 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -258,7 +258,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { const struct kvm_regs *cpu_reset; - int ret = -EINVAL; + int ret; bool loaded; /* Reset PMU outside of the non-preemptible section */ @@ -281,15 +281,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { - if (kvm_vcpu_enable_ptrauth(vcpu)) + if (kvm_vcpu_enable_ptrauth(vcpu)) { + ret = -EINVAL; goto out; + } } switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { - if (!cpu_has_32bit_el1()) + if (!cpu_has_32bit_el1()) { + ret = -EINVAL; goto out; + } cpu_reset = &default_regs_reset32; } else { cpu_reset = &default_regs_reset; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5b7085ca213d..6c888d5431b5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1276,10 +1276,16 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + int reg = r->reg; + + /* See the 32bit mapping in kvm_host.h */ + if (p->is_aarch32) + reg = r->reg / 2; + if (p->is_write) - vcpu_write_sys_reg(vcpu, p->regval, r->reg); + vcpu_write_sys_reg(vcpu, p->regval, reg); else - p->regval = vcpu_read_sys_reg(vcpu, r->reg); + p->regval = vcpu_read_sys_reg(vcpu, reg); return true; } @@ -2356,8 +2362,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) return NULL; + if (!index_to_params(id, ¶ms)) + return NULL; + table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg_by_id(id, ¶ms, table, num); + r = find_reg(¶ms, table, num); if (!r) r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 10415572e82f..322b55664cca 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -48,5 +48,6 @@ EXPORT_SYMBOL(__arch_clear_user) .section .fixup,"ax" .align 2 9: mov x0, x2 // return the original size + uaccess_disable_not_uao x2, x3 ret .previous diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 680e74409ff9..8472dc7798b3 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -66,5 +66,6 @@ EXPORT_SYMBOL(__arch_copy_from_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 0bedae3f3792..8e0355c1e318 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -68,5 +68,6 @@ EXPORT_SYMBOL(__arch_copy_in_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 2d88c736e8f2..6085214654dc 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -65,5 +65,6 @@ EXPORT_SYMBOL(__arch_copy_to_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b5e329fde2dd..aa3e43e87dab 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -24,6 +24,13 @@ static unsigned long *asid_map; static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); + +#ifdef CONFIG_MRVL_OCTEONTX_EL0_INTR +#define LOCKED_ASIDS_COUNT 128 + +static u64 locked_asids[LOCKED_ASIDS_COUNT]; +#endif + static cpumask_t tlb_flush_pending; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) @@ -100,6 +107,14 @@ static void flush_context(void) per_cpu(reserved_asids, i) = asid; } +#ifdef CONFIG_MRVL_OCTEONTX_EL0_INTR + /* Set bits for locked ASIDs. */ + for (i = 0; i < LOCKED_ASIDS_COUNT; i++) { + asid = locked_asids[i]; + if (asid != 0) + __set_bit(asid & ~ASID_MASK, asid_map); + } +#endif /* * Queue a TLB invalidation for each CPU to perform on next * context-switch @@ -107,9 +122,61 @@ static void flush_context(void) cpumask_setall(&tlb_flush_pending); } +#ifdef CONFIG_MRVL_OCTEONTX_EL0_INTR +int lock_context(struct mm_struct *mm, int index) +{ + unsigned long flags; + u64 asid; + + if ((index < 0) || (index >= LOCKED_ASIDS_COUNT)) + return -1; + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + asid = atomic64_read(&mm->context.id); + locked_asids[index] = asid; + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + return 0; +} +EXPORT_SYMBOL(lock_context); + +int unlock_context_by_index(int index) +{ + unsigned long flags; + + if ((index < 0) || (index >= LOCKED_ASIDS_COUNT)) + return -1; + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + locked_asids[index] = 0; + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + return 0; +} +EXPORT_SYMBOL(unlock_context_by_index); + +bool unlock_context_by_mm(struct mm_struct *mm) +{ + int i; + unsigned long flags; + bool hit = false; + u64 asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + asid = atomic64_read(&mm->context.id); + + for (i = 0; i < LOCKED_ASIDS_COUNT; i++) { + if (locked_asids[i] == asid) { + hit = true; + locked_asids[i] = 0; + } + } + + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + + return hit; +} +#endif + static bool check_update_reserved_asid(u64 asid, u64 newasid) { - int cpu; + int i, cpu; bool hit = false; /* @@ -128,6 +195,16 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid) } } +#ifdef CONFIG_MRVL_OCTEONTX_EL0_INTR + /* Same mechanism for locked ASIDs */ + for (i = 0; i < LOCKED_ASIDS_COUNT; i++) { + if (locked_asids[i] == asid) { + hit = true; + locked_asids[i] = newasid; + } + } +#endif + return hit; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 414b8e0f19e0..b071c6cdf999 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -432,7 +432,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, esr)) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index f475e54fbc43..946414cd9ca9 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -230,6 +230,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = (pte_t *)pudp; } else if (sz == (PAGE_SIZE * CONT_PTES)) { pmdp = pmd_alloc(mm, pudp, addr); + if (!pmdp) + return NULL; WARN_ON(addr & (sz - 1)); /* diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index cf798a1628cf..820438c14f34 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -32,6 +32,7 @@ config CSKY select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_AUDITSYSCALL + select HAVE_COPY_THREAD_TLS select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER @@ -70,7 +71,7 @@ config CPU_HAS_TLBI config CPU_HAS_LDSTEX bool help - For SMP, CPU needs "ldex&stex" instrcutions to atomic operations. + For SMP, CPU needs "ldex&stex" instructions for atomic operations. config CPU_NEED_TLBSYNC bool diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h index 7ab78bd0f3b1..61d94ec7dd16 100644 --- a/arch/csky/abiv1/inc/abi/entry.h +++ b/arch/csky/abiv1/inc/abi/entry.h @@ -16,14 +16,16 @@ #define LSAVE_A4 40 #define LSAVE_A5 44 +#define usp ss1 + .macro USPTOKSP - mtcr sp, ss1 + mtcr sp, usp mfcr sp, ss0 .endm .macro KSPTOUSP mtcr sp, ss0 - mfcr sp, ss1 + mfcr sp, usp .endm .macro SAVE_ALL epc_inc @@ -45,7 +47,13 @@ add lr, r13 stw lr, (sp, 8) + mov lr, sp + addi lr, 32 + addi lr, 32 + addi lr, 16 + bt 2f mfcr lr, ss1 +2: stw lr, (sp, 16) stw a0, (sp, 20) @@ -79,9 +87,10 @@ ldw a0, (sp, 12) mtcr a0, epsr btsti a0, 31 + bt 1f ldw a0, (sp, 16) mtcr a0, ss1 - +1: ldw a0, (sp, 24) ldw a1, (sp, 28) ldw a2, (sp, 32) @@ -102,9 +111,9 @@ addi sp, 32 addi sp, 8 - bt 1f + bt 2f KSPTOUSP -1: +2: rte .endm @@ -158,15 +167,12 @@ * BA Reserved C D V */ cprcr r6, cpcr30 - lsri r6, 28 - lsli r6, 28 + lsri r6, 29 + lsli r6, 29 addi r6, 0xe cpwcr r6, cpcr30 - lsri r6, 28 - addi r6, 2 - lsli r6, 28 - addi r6, 0xe + movi r6, 0 cpwcr r6, cpcr31 .endm diff --git a/arch/csky/abiv2/fpu.c b/arch/csky/abiv2/fpu.c index e7e11344005a..ff92bf4af7df 100644 --- a/arch/csky/abiv2/fpu.c +++ b/arch/csky/abiv2/fpu.c @@ -10,11 +10,6 @@ #define MTCR_DIST 0xC0006420 #define MFCR_DIST 0xC0006020 -void __init init_fpu(void) -{ - mtcr("cr<1, 2>", 0); -} - /* * fpu_libc_helper() is to help libc to excute: * - mfcr %a, cr<1, 2> diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h index 9897a16b45e5..ac8f65a3e75a 100644 --- a/arch/csky/abiv2/inc/abi/entry.h +++ b/arch/csky/abiv2/inc/abi/entry.h @@ -13,6 +13,8 @@ #define LSAVE_A1 28 #define LSAVE_A2 32 #define LSAVE_A3 36 +#define LSAVE_A4 40 +#define LSAVE_A5 44 #define KSPTOUSP #define USPTOKSP @@ -31,7 +33,13 @@ mfcr lr, epsr stw lr, (sp, 12) + btsti lr, 31 + bf 1f + addi lr, sp, 152 + br 2f +1: mfcr lr, usp +2: stw lr, (sp, 16) stw a0, (sp, 20) @@ -64,8 +72,10 @@ mtcr a0, epc ldw a0, (sp, 12) mtcr a0, epsr + btsti a0, 31 ldw a0, (sp, 16) mtcr a0, usp + mtcr a0, ss0 #ifdef CONFIG_CPU_HAS_HILO ldw a0, (sp, 140) @@ -86,6 +96,9 @@ addi sp, 40 ldm r16-r30, (sp) addi sp, 72 + bf 1f + mfcr sp, ss0 +1: rte .endm @@ -214,16 +227,13 @@ */ mfcr r6, cr<30, 15> /* Get MSA0 */ 2: - lsri r6, 28 - lsli r6, 28 + lsri r6, 29 + lsli r6, 29 addi r6, 0x1ce mtcr r6, cr<30, 15> /* Set MSA0 */ - lsri r6, 28 - addi r6, 2 - lsli r6, 28 - addi r6, 0x1ce - mtcr r6, cr<31, 15> /* Set MSA1 */ + movi r6, 0 + mtcr r6, cr<31, 15> /* Clr MSA1 */ /* enable MMU */ mfcr r6, cr18 diff --git a/arch/csky/abiv2/inc/abi/fpu.h b/arch/csky/abiv2/inc/abi/fpu.h index 22ca3cf2794a..09e2700a3693 100644 --- a/arch/csky/abiv2/inc/abi/fpu.h +++ b/arch/csky/abiv2/inc/abi/fpu.h @@ -9,7 +9,8 @@ int fpu_libc_helper(struct pt_regs *regs); void fpu_fpe(struct pt_regs *regs); -void __init init_fpu(void); + +static inline void init_fpu(void) { mtcr("cr<1, 2>", 0); } void save_to_user_fp(struct user_fp *user_fp); void restore_from_user_fp(struct user_fp *user_fp); diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 21e0bd5293dd..c6bcd7f7c720 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -43,6 +43,7 @@ extern struct cpuinfo_csky cpu_data[]; struct thread_struct { unsigned long ksp; /* kernel stack pointer */ unsigned long sr; /* saved status register */ + unsigned long trap_no; /* saved status register */ /* FPU regs */ struct user_fp __aligned(16) user_fp; diff --git a/arch/csky/include/asm/uaccess.h b/arch/csky/include/asm/uaccess.h index eaa1c3403a42..60f8a4112588 100644 --- a/arch/csky/include/asm/uaccess.h +++ b/arch/csky/include/asm/uaccess.h @@ -254,7 +254,7 @@ do { \ extern int __get_user_bad(void); -#define __copy_user(to, from, n) \ +#define ___copy_to_user(to, from, n) \ do { \ int w0, w1, w2, w3; \ asm volatile( \ @@ -289,31 +289,34 @@ do { \ " subi %0, 4 \n" \ " br 3b \n" \ "5: cmpnei %0, 0 \n" /* 1B */ \ - " bf 8f \n" \ + " bf 13f \n" \ " ldb %3, (%2, 0) \n" \ "6: stb %3, (%1, 0) \n" \ " addi %2, 1 \n" \ " addi %1, 1 \n" \ " subi %0, 1 \n" \ " br 5b \n" \ - "7: br 8f \n" \ + "7: subi %0, 4 \n" \ + "8: subi %0, 4 \n" \ + "12: subi %0, 4 \n" \ + " br 13f \n" \ ".section __ex_table, \"a\" \n" \ ".align 2 \n" \ - ".long 2b, 7b \n" \ - ".long 9b, 7b \n" \ - ".long 10b, 7b \n" \ + ".long 2b, 13f \n" \ + ".long 4b, 13f \n" \ + ".long 6b, 13f \n" \ + ".long 9b, 12b \n" \ + ".long 10b, 8b \n" \ ".long 11b, 7b \n" \ - ".long 4b, 7b \n" \ - ".long 6b, 7b \n" \ ".previous \n" \ - "8: \n" \ + "13: \n" \ : "=r"(n), "=r"(to), "=r"(from), "=r"(w0), \ "=r"(w1), "=r"(w2), "=r"(w3) \ : "0"(n), "1"(to), "2"(from) \ : "memory"); \ } while (0) -#define __copy_user_zeroing(to, from, n) \ +#define ___copy_from_user(to, from, n) \ do { \ int tmp; \ int nsave; \ @@ -356,22 +359,22 @@ do { \ " addi %1, 1 \n" \ " subi %0, 1 \n" \ " br 5b \n" \ - "8: mov %3, %0 \n" \ - " movi %4, 0 \n" \ - "9: stb %4, (%1, 0) \n" \ - " addi %1, 1 \n" \ - " subi %3, 1 \n" \ - " cmpnei %3, 0 \n" \ - " bt 9b \n" \ - " br 7f \n" \ + "8: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ + " bf 7f \n" \ + "9: subi %0, 8 \n" \ + " bf 7f \n" \ + "13: stw %3, (%1, 8) \n" \ + " subi %0, 12 \n" \ + " bf 7f \n" \ ".section __ex_table, \"a\" \n" \ ".align 2 \n" \ - ".long 2b, 8b \n" \ + ".long 2b, 7f \n" \ + ".long 4b, 7f \n" \ + ".long 6b, 7f \n" \ ".long 10b, 8b \n" \ - ".long 11b, 8b \n" \ - ".long 12b, 8b \n" \ - ".long 4b, 8b \n" \ - ".long 6b, 8b \n" \ + ".long 11b, 9b \n" \ + ".long 12b,13b \n" \ ".previous \n" \ "7: \n" \ : "=r"(n), "=r"(to), "=r"(from), "=r"(nsave), \ diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h index ec60e49cea66..78c2a35e4c8a 100644 --- a/arch/csky/include/uapi/asm/unistd.h +++ b/arch/csky/include/uapi/asm/unistd.h @@ -1,7 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS #include <asm-generic/unistd.h> diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S index 5b84f11485ae..3821ef9b7567 100644 --- a/arch/csky/kernel/atomic.S +++ b/arch/csky/kernel/atomic.S @@ -17,10 +17,12 @@ ENTRY(csky_cmpxchg) mfcr a3, epc addi a3, TRAP0_SIZE - subi sp, 8 + subi sp, 16 stw a3, (sp, 0) mfcr a3, epsr stw a3, (sp, 4) + mfcr a3, usp + stw a3, (sp, 8) psrset ee #ifdef CONFIG_CPU_HAS_LDSTEX @@ -47,7 +49,9 @@ ENTRY(csky_cmpxchg) mtcr a3, epc ldw a3, (sp, 4) mtcr a3, epsr - addi sp, 8 + ldw a3, (sp, 8) + mtcr a3, usp + addi sp, 16 KSPTOUSP rte END(csky_cmpxchg) diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a7e84ccccbd8..e18c2f02b6f3 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -162,8 +162,10 @@ csky_syscall_trace: ldw a3, (sp, LSAVE_A3) #if defined(__CSKYABIV2__) subi sp, 8 - stw r5, (sp, 0x4) - stw r4, (sp, 0x0) + ldw r9, (sp, LSAVE_A4) + stw r9, (sp, 0x0) + ldw r9, (sp, LSAVE_A5) + stw r9, (sp, 0x4) #else ldw r6, (sp, LSAVE_A4) ldw r7, (sp, LSAVE_A5) @@ -312,8 +314,6 @@ ENTRY(__switch_to) mfcr a2, psr /* Save PSR value */ stw a2, (a3, THREAD_SR) /* Save PSR in task struct */ - bclri a2, 6 /* Disable interrupts */ - mtcr a2, psr SAVE_SWITCH_STACK diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S index 61989f9241c0..17ed9d250480 100644 --- a/arch/csky/kernel/head.S +++ b/arch/csky/kernel/head.S @@ -21,6 +21,11 @@ END(_start) ENTRY(_start_smp_secondary) SETUP_MMU + /* copy msa1 from CPU0 */ + lrw r6, secondary_msa1 + ld.w r6, (r6, 0) + mtcr r6, cr<31, 15> + /* set stack point */ lrw r6, secondary_stack ld.w r6, (r6, 0) diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c index e68ff375c8f8..ab55e98ee8f6 100644 --- a/arch/csky/kernel/perf_callchain.c +++ b/arch/csky/kernel/perf_callchain.c @@ -12,12 +12,17 @@ struct stackframe { static int unwind_frame_kernel(struct stackframe *frame) { - if (kstack_end((void *)frame->fp)) + unsigned long low = (unsigned long)task_stack_page(current); + unsigned long high = low + THREAD_SIZE; + + if (unlikely(frame->fp < low || frame->fp > high)) return -EPERM; - if (frame->fp & 0x3 || frame->fp < TASK_SIZE) + + if (kstack_end((void *)frame->fp) || frame->fp & 0x3) return -EPERM; *frame = *(struct stackframe *)frame->fp; + if (__kernel_text_address(frame->lr)) { int graph = 0; diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index e555740c0be5..1bde082990ef 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -34,10 +34,11 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sw->r15; } -int copy_thread(unsigned long clone_flags, +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, unsigned long kthread_arg, - struct task_struct *p) + struct task_struct *p, + unsigned long tls) { struct switch_stack *childstack; struct pt_regs *childregs = task_pt_regs(p); @@ -64,7 +65,7 @@ int copy_thread(unsigned long clone_flags, childregs->usp = usp; if (clone_flags & CLONE_SETTLS) task_thread_info(p)->tp_value = childregs->tls - = childregs->regs[0]; + = tls; childregs->a0 = 0; childstack->r15 = (unsigned long) ret_from_fork; diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 23ee604aafdb..2c1e253abb74 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -24,26 +24,9 @@ struct screen_info screen_info = { }; #endif -phys_addr_t __init_memblock memblock_end_of_REG0(void) -{ - return (memblock.memory.regions[0].base + - memblock.memory.regions[0].size); -} - -phys_addr_t __init_memblock memblock_start_of_REG1(void) -{ - return memblock.memory.regions[1].base; -} - -size_t __init_memblock memblock_size_of_REG1(void) -{ - return memblock.memory.regions[1].size; -} - static void __init csky_memblock_init(void) { unsigned long zone_size[MAX_NR_ZONES]; - unsigned long zhole_size[MAX_NR_ZONES]; signed long size; memblock_reserve(__pa(_stext), _end - _stext); @@ -57,54 +40,36 @@ static void __init csky_memblock_init(void) memblock_dump_all(); memset(zone_size, 0, sizeof(zone_size)); - memset(zhole_size, 0, sizeof(zhole_size)); min_low_pfn = PFN_UP(memblock_start_of_DRAM()); - max_pfn = PFN_DOWN(memblock_end_of_DRAM()); - - max_low_pfn = PFN_UP(memblock_end_of_REG0()); - if (max_low_pfn == 0) - max_low_pfn = max_pfn; + max_low_pfn = max_pfn = PFN_DOWN(memblock_end_of_DRAM()); size = max_pfn - min_low_pfn; - if (memblock.memory.cnt > 1) { - zone_size[ZONE_NORMAL] = - PFN_DOWN(memblock_start_of_REG1()) - min_low_pfn; - zhole_size[ZONE_NORMAL] = - PFN_DOWN(memblock_start_of_REG1()) - max_low_pfn; + if (size <= PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET)) + zone_size[ZONE_NORMAL] = size; + else if (size < PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET)) { + zone_size[ZONE_NORMAL] = + PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET); + max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; } else { - if (size <= PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET)) - zone_size[ZONE_NORMAL] = max_pfn - min_low_pfn; - else { - zone_size[ZONE_NORMAL] = + zone_size[ZONE_NORMAL] = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; - } + max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; + write_mmu_msa1(read_mmu_msa0() + SSEG_SIZE); } #ifdef CONFIG_HIGHMEM - size = 0; - if (memblock.memory.cnt > 1) { - size = PFN_DOWN(memblock_size_of_REG1()); - highstart_pfn = PFN_DOWN(memblock_start_of_REG1()); - } else { - size = max_pfn - min_low_pfn - - PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - highstart_pfn = min_low_pfn + - PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - } - - if (size > 0) - zone_size[ZONE_HIGHMEM] = size; + zone_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn; - highend_pfn = max_pfn; + highstart_pfn = max_low_pfn; + highend_pfn = max_pfn; #endif memblock_set_current_limit(PFN_PHYS(max_low_pfn)); dma_contiguous_reserve(0); - free_area_init_node(0, zone_size, min_low_pfn, zhole_size); + free_area_init_node(0, zone_size, min_low_pfn, NULL); } void __init setup_arch(char **cmdline_p) diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index b07a534b3062..609123c975c1 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -22,6 +22,9 @@ #include <asm/sections.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> +#ifdef CONFIG_CPU_HAS_FPU +#include <abi/fpu.h> +#endif struct ipi_data_struct { unsigned long bits ____cacheline_aligned; @@ -120,7 +123,7 @@ void __init setup_smp_ipi(void) int rc; if (ipi_irq == 0) - panic("%s IRQ mapping failed\n", __func__); + return; rc = request_percpu_irq(ipi_irq, handle_ipi, "IPI Interrupt", &ipi_dummy_dev); @@ -156,6 +159,8 @@ volatile unsigned int secondary_hint; volatile unsigned int secondary_ccr; volatile unsigned int secondary_stack; +unsigned long secondary_msa1; + int __cpu_up(unsigned int cpu, struct task_struct *tidle) { unsigned long mask = 1 << cpu; @@ -164,6 +169,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) (unsigned int) task_stack_page(tidle) + THREAD_SIZE - 8; secondary_hint = mfcr("cr31"); secondary_ccr = mfcr("cr18"); + secondary_msa1 = read_mmu_msa1(); /* * Because other CPUs are in reset status, we must flush data diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index f487a9b996ae..de903b18cd2a 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -115,8 +115,9 @@ asmlinkage void trap_c(struct pt_regs *regs) int sig; unsigned long vector; siginfo_t info; + struct task_struct *tsk = current; - vector = (mfcr("psr") >> 16) & 0xff; + vector = (regs->sr >> 16) & 0xff; switch (vector) { case VEC_ZERODIV: @@ -128,6 +129,7 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGTRAP; break; case VEC_ILLEGAL: + tsk->thread.trap_no = vector; #ifndef CONFIG_CPU_NO_USER_BKPT if (*(uint16_t *)instruction_pointer(regs) != USR_BKPT) #endif @@ -143,15 +145,19 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGTRAP; break; case VEC_ACCESS: + tsk->thread.trap_no = vector; return buserr(regs); #ifdef CONFIG_CPU_NEED_SOFTALIGN case VEC_ALIGN: + tsk->thread.trap_no = vector; return csky_alignment(regs); #endif #ifdef CONFIG_CPU_HAS_FPU case VEC_FPE: + tsk->thread.trap_no = vector; return fpu_fpe(regs); case VEC_PRIV: + tsk->thread.trap_no = vector; if (fpu_libc_helper(regs)) return; #endif @@ -159,5 +165,8 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGSEGV; break; } + + tsk->thread.trap_no = vector; + send_sig(sig, current, 0); } diff --git a/arch/csky/lib/usercopy.c b/arch/csky/lib/usercopy.c index 647a23986fb5..3c9bd645e643 100644 --- a/arch/csky/lib/usercopy.c +++ b/arch/csky/lib/usercopy.c @@ -7,10 +7,7 @@ unsigned long raw_copy_from_user(void *to, const void *from, unsigned long n) { - if (access_ok(from, n)) - __copy_user_zeroing(to, from, n); - else - memset(to, 0, n); + ___copy_from_user(to, from, n); return n; } EXPORT_SYMBOL(raw_copy_from_user); @@ -18,8 +15,7 @@ EXPORT_SYMBOL(raw_copy_from_user); unsigned long raw_copy_to_user(void *to, const void *from, unsigned long n) { - if (access_ok(to, n)) - __copy_user(to, from, n); + ___copy_to_user(to, from, n); return n; } EXPORT_SYMBOL(raw_copy_to_user); diff --git a/arch/csky/mm/Makefile b/arch/csky/mm/Makefile index 4eebebdcd1bf..1e5fb600216a 100644 --- a/arch/csky/mm/Makefile +++ b/arch/csky/mm/Makefile @@ -1,8 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only ifeq ($(CONFIG_CPU_HAS_CACHEV2),y) obj-y += cachev2.o +CFLAGS_REMOVE_cachev2.o = $(CC_FLAGS_FTRACE) else obj-y += cachev1.o +CFLAGS_REMOVE_cachev1.o = $(CC_FLAGS_FTRACE) endif obj-y += dma-mapping.o diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index 18041f46ded1..01290c551071 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -179,11 +179,14 @@ bad_area: bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); return; } no_context: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; @@ -198,6 +201,8 @@ no_context: die_if_kernel("Oops", regs, write); out_of_memory: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). @@ -206,6 +211,8 @@ out_of_memory: return; do_sigbus: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index 66e597053488..a0b9b8210af1 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pte_table); unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 12cd9231c4b8..0231d69c8bf2 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -91,7 +91,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -124,7 +124,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%2);\n" \ " %1 = "#op "(%0,%3);\n" \ " memw_locked(%2,P3)=%1;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output), "=&r" (val) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -173,7 +173,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) " }" " memw_locked(%2, p3) = %1;" " {" - " if !p3 jump 1b;" + " if (!p3) jump 1b;" " }" "2:" : "=&r" (__oldval), "=&r" (tmp) diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 47384b094b94..71429f756af0 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -38,7 +38,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -62,7 +62,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -88,7 +88,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -223,7 +223,7 @@ static inline int ffs(int x) int r; asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n" - "{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n" + "{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n" : "=&r" (r) : "r" (x) : "p0"); diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index 6091322c3af9..92b8a02e588a 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -30,7 +30,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, __asm__ __volatile__ ( "1: %0 = memw_locked(%1);\n" /* load into retval */ " memw_locked(%1,P0) = %2;\n" /* store into memory */ - " if !P0 jump 1b;\n" + " if (!P0) jump 1b;\n" : "=&r" (retval) : "r" (ptr), "r" (x) : "memory", "p0" diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h index cb635216a732..0191f7c7193e 100644 --- a/arch/hexagon/include/asm/futex.h +++ b/arch/hexagon/include/asm/futex.h @@ -16,7 +16,7 @@ /* For example: %1 = %4 */ \ insn \ "2: memw_locked(%3,p2) = %1;\n" \ - " if !p2 jump 1b;\n" \ + " if (!p2) jump 1b;\n" \ " %1 = #0;\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ @@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, "1: %1 = memw_locked(%3)\n" " {\n" " p2 = cmp.eq(%1,%4)\n" - " if !p2.new jump:NT 3f\n" + " if (!p2.new) jump:NT 3f\n" " }\n" "2: memw_locked(%3,p2) = %5\n" - " if !p2 jump 1b\n" + " if (!p2) jump 1b\n" "3:\n" ".section .fixup,\"ax\"\n" "4: %0 = #%6\n" diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h index ba1a444d55b3..68a68147504d 100644 --- a/arch/hexagon/include/asm/io.h +++ b/arch/hexagon/include/asm/io.h @@ -171,16 +171,10 @@ static inline void writel(u32 data, volatile void __iomem *addr) #define writew_relaxed __raw_writew #define writel_relaxed __raw_writel -/* - * Need an mtype somewhere in here, for cache type deals? - * This is probably too long for an inline. - */ -void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size); +void __iomem *ioremap(unsigned long phys_addr, unsigned long size); +#define ioremap_nocache ioremap +#define ioremap_uc(X, Y) ioremap((X), (Y)) -static inline void __iomem *ioremap(unsigned long phys_addr, unsigned long size) -{ - return ioremap_nocache(phys_addr, size); -} static inline void iounmap(volatile void __iomem *addr) { diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index bfe07d842ff3..ef103b73bec8 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -30,9 +30,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " { P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -46,7 +46,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) "1: R6 = memw_locked(%0);\n" " R6 = add(R6,#-1);\n" " memw_locked(%0,P3) = R6\n" - " if !P3 jump 1b;\n" + " if (!P3) jump 1b;\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -61,7 +61,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " { %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " { %0 = P3 }\n" "1:\n" @@ -78,9 +78,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0)\n" " { P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -94,7 +94,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1)\n" " { %0 = #0; P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" @@ -117,9 +117,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1b; R6 = #1; }\n" + " { if (!P3) jump 1b; R6 = #1; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -139,7 +139,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1f; R6 = #1; %0 = #0; }\n" + " { if (!P3) jump 1f; R6 = #1; %0 = #0; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c index cf8974beb500..b3dbb472572e 100644 --- a/arch/hexagon/kernel/hexagon_ksyms.c +++ b/arch/hexagon/kernel/hexagon_ksyms.c @@ -20,7 +20,7 @@ EXPORT_SYMBOL(__vmgetie); EXPORT_SYMBOL(__vmsetie); EXPORT_SYMBOL(__vmyield); EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(ioremap_nocache); +EXPORT_SYMBOL(ioremap); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c index 35f29423fda8..5ed02f699479 100644 --- a/arch/hexagon/kernel/stacktrace.c +++ b/arch/hexagon/kernel/stacktrace.c @@ -11,8 +11,6 @@ #include <linux/thread_info.h> #include <linux/module.h> -register unsigned long current_frame_pointer asm("r30"); - struct stackframe { unsigned long fp; unsigned long rets; @@ -30,7 +28,7 @@ void save_stack_trace(struct stack_trace *trace) low = (unsigned long)task_stack_page(current); high = low + THREAD_SIZE; - fp = current_frame_pointer; + fp = (unsigned long)__builtin_frame_address(0); while (fp >= low && fp <= (high - sizeof(*frame))) { frame = (struct stackframe *)fp; diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S index 12242c27e2df..4023fdbea490 100644 --- a/arch/hexagon/kernel/vm_entry.S +++ b/arch/hexagon/kernel/vm_entry.S @@ -369,7 +369,7 @@ ret_from_fork: R26.L = #LO(do_work_pending); R0 = #VM_INT_DISABLE; } - if P0 jump check_work_pending + if (P0) jump check_work_pending { R0 = R25; callr R24 diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c index 77d8e1e69e9b..b103d83b5fbb 100644 --- a/arch/hexagon/mm/ioremap.c +++ b/arch/hexagon/mm/ioremap.c @@ -9,7 +9,7 @@ #include <linux/vmalloc.h> #include <linux/mm.h> -void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) +void __iomem *ioremap(unsigned long phys_addr, unsigned long size) { unsigned long last_addr, addr; unsigned long offset = phys_addr & ~PAGE_MASK; diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c index 62b0eb6cf69a..84eab0f5e00a 100644 --- a/arch/m68k/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c @@ -216,8 +216,10 @@ static int __init mcf_pci_init(void) /* Keep a virtual mapping to IO/config space active */ iospace = (unsigned long) ioremap(PCI_IO_PA, PCI_IO_SIZE); - if (iospace == 0) + if (iospace == 0) { + pci_free_host_bridge(bridge); return -ENODEV; + } pr_info("Coldfire: PCI IO/config window mapped to 0x%x\n", (u32) iospace); diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h index de1470c4d829..1149251ea58d 100644 --- a/arch/m68k/include/asm/mac_via.h +++ b/arch/m68k/include/asm/mac_via.h @@ -257,6 +257,7 @@ extern int rbv_present,via_alt_mapping; struct irq_desc; +extern void via_l2_flush(int writeback); extern void via_register_interrupts(void); extern void via_irq_enable(int); extern void via_irq_disable(int); diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 3c5def10d486..caa260f877f2 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -139,7 +139,8 @@ void __init setup_arch(char **cmdline_p) pr_debug("MEMORY -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx\n ", __bss_stop, memory_start, memory_start, memory_end); - memblock_add(memory_start, memory_end - memory_start); + memblock_add(_rambase, memory_end - _rambase); + memblock_reserve(_rambase, memory_start - _rambase); /* Keep a copy of command line */ *cmdline_p = &command_line[0]; diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 11be08f4f750..b3d2dbb27231 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -59,7 +59,6 @@ extern void iop_preinit(void); extern void iop_init(void); extern void via_init(void); extern void via_init_clock(irq_handler_t func); -extern void via_flush_cache(void); extern void oss_init(void); extern void psc_init(void); extern void baboon_init(void); @@ -130,21 +129,6 @@ int __init mac_parse_bootinfo(const struct bi_record *record) return unknown; } -/* - * Flip into 24bit mode for an instant - flushes the L2 cache card. We - * have to disable interrupts for this. Our IRQ handlers will crap - * themselves if they take an IRQ in 24bit mode! - */ - -static void mac_cache_card_flush(int writeback) -{ - unsigned long flags; - - local_irq_save(flags); - via_flush_cache(); - local_irq_restore(flags); -} - void __init config_mac(void) { if (!MACH_IS_MAC) @@ -175,9 +159,8 @@ void __init config_mac(void) * not. */ - if (macintosh_config->ident == MAC_MODEL_IICI - || macintosh_config->ident == MAC_MODEL_IIFX) - mach_l2_flush = mac_cache_card_flush; + if (macintosh_config->ident == MAC_MODEL_IICI) + mach_l2_flush = via_l2_flush; } diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 9bfa17015768..c432bfafe63e 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -183,7 +183,7 @@ static __inline__ void iop_writeb(volatile struct mac_iop *iop, __u16 addr, __u8 static __inline__ void iop_stop(volatile struct mac_iop *iop) { - iop->status_ctrl &= ~IOP_RUN; + iop->status_ctrl = IOP_AUTOINC; } static __inline__ void iop_start(volatile struct mac_iop *iop) @@ -191,14 +191,9 @@ static __inline__ void iop_start(volatile struct mac_iop *iop) iop->status_ctrl = IOP_RUN | IOP_AUTOINC; } -static __inline__ void iop_bypass(volatile struct mac_iop *iop) -{ - iop->status_ctrl |= IOP_BYPASS; -} - static __inline__ void iop_interrupt(volatile struct mac_iop *iop) { - iop->status_ctrl |= IOP_IRQ; + iop->status_ctrl = IOP_IRQ | IOP_RUN | IOP_AUTOINC; } static int iop_alive(volatile struct mac_iop *iop) @@ -244,7 +239,6 @@ void __init iop_preinit(void) } else { iop_base[IOP_NUM_SCC] = (struct mac_iop *) SCC_IOP_BASE_QUADRA; } - iop_base[IOP_NUM_SCC]->status_ctrl = 0x87; iop_scc_present = 1; } else { iop_base[IOP_NUM_SCC] = NULL; @@ -256,7 +250,7 @@ void __init iop_preinit(void) } else { iop_base[IOP_NUM_ISM] = (struct mac_iop *) ISM_IOP_BASE_QUADRA; } - iop_base[IOP_NUM_ISM]->status_ctrl = 0; + iop_stop(iop_base[IOP_NUM_ISM]); iop_ism_present = 1; } else { iop_base[IOP_NUM_ISM] = NULL; @@ -416,7 +410,8 @@ static void iop_handle_send(uint iop_num, uint chan) msg->status = IOP_MSGSTATUS_UNUSED; msg = msg->next; iop_send_queue[iop_num][chan] = msg; - if (msg) iop_do_send(msg); + if (msg && iop_readb(iop, IOP_ADDR_SEND_STATE + chan) == IOP_MSG_IDLE) + iop_do_send(msg); } /* @@ -490,16 +485,12 @@ int iop_send_message(uint iop_num, uint chan, void *privdata, if (!(q = iop_send_queue[iop_num][chan])) { iop_send_queue[iop_num][chan] = msg; + iop_do_send(msg); } else { while (q->next) q = q->next; q->next = msg; } - if (iop_readb(iop_base[iop_num], - IOP_ADDR_SEND_STATE + chan) == IOP_MSG_IDLE) { - iop_do_send(msg); - } - return 0; } diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 3c2cfcb74982..1f0fad2a98a0 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -294,10 +294,14 @@ void via_debug_dump(void) * the system into 24-bit mode for an instant. */ -void via_flush_cache(void) +void via_l2_flush(int writeback) { + unsigned long flags; + + local_irq_save(flags); via2[gBufB] &= ~VIA2B_vMode32; via2[gBufB] |= VIA2B_vMode32; + local_irq_restore(flags); } /* diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 6cb1e41d58d0..70a5f55ea664 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -164,7 +164,7 @@ void __init cf_bootmem_alloc(void) m68k_memory[0].addr = _rambase; m68k_memory[0].size = _ramend - _rambase; - memblock_add(m68k_memory[0].addr, m68k_memory[0].size); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); /* compute total pages in system */ num_pages = PFN_DOWN(_ramend - _rambase); diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 0bde47e4fa69..dcba53803fa5 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void) #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \ do { \ int align = ~(cache_line_length - 1); \ - end = min(start + cache_size, end); \ + if (start < UINT_MAX - cache_size) \ + end = min(start + cache_size, end); \ start &= align; \ } while (0) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 70d3200476bf..53025ec6795f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -43,7 +43,7 @@ config MIPS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT - select HAVE_EBPF_JIT if (!CPU_MICROMIPS) + select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT diff --git a/arch/mips/Makefile b/arch/mips/Makefile index eceff9b75b22..cc7e25d0df57 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -289,12 +289,23 @@ ifdef CONFIG_64BIT endif endif +# When linking a 32-bit executable the LLVM linker cannot cope with a +# 32-bit load address that has been sign-extended to 64 bits. Simply +# remove the upper 32 bits then, as it is safe to do so with other +# linkers. +ifdef CONFIG_64BIT + load-ld = $(load-y) +else + load-ld = $(subst 0xffffffff,0x,$(load-y)) +endif + KBUILD_AFLAGS += $(cflags-y) KBUILD_CFLAGS += $(cflags-y) -KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) +KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -DLINKER_LOAD_ADDRESS=$(load-ld) KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0) bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ + LINKER_LOAD_ADDRESS=$(load-ld) \ VMLINUX_ENTRY_ADDRESS=$(entry-y) \ PLATFORM="$(platform-y)" \ ITS_INPUTS="$(its-y)" diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index 4eea4188cb20..13e0beb9eee3 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -12,7 +12,7 @@ __archpost: include scripts/Kbuild.include CMD_RELOCS = arch/mips/boot/tools/relocs -quiet_cmd_relocs = RELOCS $@ +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ # `@true` prevents complaint when there is nothing to be done diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c index 77a836e661c9..df69eaa453a1 100644 --- a/arch/mips/bcm63xx/prom.c +++ b/arch/mips/bcm63xx/prom.c @@ -84,7 +84,7 @@ void __init prom_init(void) * Here we will start up CPU1 in the background and ask it to * reconfigure itself then go back to sleep. */ - memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20); + memcpy((void *)0xa0000200, bmips_smp_movevec, 0x20); __sync(); set_c0_cause(C_SW0); cpumask_set_cpu(1, &bmips_booted_mask); diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 3ce4dd578370..c66199e38694 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS targets += vmlinux.its targets += vmlinux.gz.its targets += vmlinux.bz2.its -targets += vmlinux.lzmo.its +targets += vmlinux.lzma.its targets += vmlinux.lzo.its quiet_cmd_cpp_its_S = ITS $@ diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 172801ed35b8..378cbfb31ee7 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -29,6 +29,9 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o @@ -87,7 +90,7 @@ ifneq ($(zload-y),) VMLINUZ_LOAD_ADDRESS := $(zload-y) else VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \ - $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS)) + $(obj)/vmlinux.bin $(LINKER_LOAD_ADDRESS)) endif UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index f97be32bf699..3ad1f76c063a 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2199,6 +2199,9 @@ static int octeon_irq_cib_map(struct irq_domain *d, } cd = kzalloc(sizeof(*cd), GFP_KERNEL); + if (!cd) + return -ENOMEM; + cd->host_data = host_data; cd->bit = hw; diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c index 1f730ded5224..5ab29c18aacd 100644 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ b/arch/mips/cavium-octeon/octeon-usb.c @@ -517,6 +517,7 @@ static int __init dwc3_octeon_device_init(void) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { + put_device(&pdev->dev); dev_err(&pdev->dev, "No memory resources\n"); return -ENXIO; } @@ -528,8 +529,10 @@ static int __init dwc3_octeon_device_init(void) * know the difference. */ base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(base)) + if (IS_ERR(base)) { + put_device(&pdev->dev); return PTR_ERR(base); + } mutex_lock(&dwc3_octeon_clocks_mutex); dwc3_octeon_clocks_start(&pdev->dev, (u64)base); diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 1322adb705c8..1ef17ca93ac1 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -232,7 +232,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_MEDIA_USB_SUPPORT=y CONFIG_USB_VIDEO_CLASS=m CONFIG_DRM=y -CONFIG_DRM_RADEON=y +CONFIG_DRM_RADEON=m CONFIG_FB_RADEON=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m diff --git a/arch/mips/fw/sni/sniprom.c b/arch/mips/fw/sni/sniprom.c index 8772617b64ce..80112f2298b6 100644 --- a/arch/mips/fw/sni/sniprom.c +++ b/arch/mips/fw/sni/sniprom.c @@ -43,7 +43,7 @@ /* O32 stack has to be 8-byte aligned. */ static u64 o32_stk[4096]; -#define O32_STK &o32_stk[sizeof(o32_stk)] +#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)]) #define __PROM_O32(fun, arg) fun arg __asm__(#fun); \ __asm__(#fun " = call_o32") diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h index bf6a8afd7ad2..581a6a3c66e4 100644 --- a/arch/mips/include/asm/bmips.h +++ b/arch/mips/include/asm/bmips.h @@ -75,11 +75,11 @@ static inline int register_bmips_smp_ops(void) #endif } -extern char bmips_reset_nmi_vec; -extern char bmips_reset_nmi_vec_end; -extern char bmips_smp_movevec; -extern char bmips_smp_int_vec; -extern char bmips_smp_int_vec_end; +extern char bmips_reset_nmi_vec[]; +extern char bmips_reset_nmi_vec_end[]; +extern char bmips_smp_movevec[]; +extern char bmips_smp_int_vec[]; +extern char bmips_smp_int_vec_end[]; extern int bmips_smp_enabled; extern int bmips_cpu_offset; diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index c8a47d18f628..2b61052e10c9 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -77,8 +77,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void) extern unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int size); -static inline unsigned long __xchg(volatile void *ptr, unsigned long x, - int size) +static __always_inline +unsigned long __xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { case 1: @@ -153,8 +153,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size); -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, unsigned int size) +static __always_inline +unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) { switch (size) { case 1: diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 4e2bea8875f5..92ec31166edd 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -291,10 +291,12 @@ # define cpu_has_mips32r6 __isa_ge_or_flag(6, MIPS_CPU_ISA_M32R6) #endif #ifndef cpu_has_mips64r1 -# define cpu_has_mips64r1 __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1) +# define cpu_has_mips64r1 (cpu_has_64bits && \ + __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1)) #endif #ifndef cpu_has_mips64r2 -# define cpu_has_mips64r2 __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2) +# define cpu_has_mips64r2 (cpu_has_64bits && \ + __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2)) #endif #ifndef cpu_has_mips64r6 # define cpu_has_mips64r6 __isa_ge_and_flag(6, MIPS_CPU_ISA_M64R6) diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 29997e42480e..4597017f147b 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -462,7 +462,12 @@ __BUILD_MEMORY_PFX(, bwlq, type, 0) BUILDIO_MEM(b, u8) BUILDIO_MEM(w, u16) BUILDIO_MEM(l, u32) +#ifdef CONFIG_64BIT BUILDIO_MEM(q, u64) +#else +__BUILD_MEMORY_PFX(__raw_, q, u64, 0) +__BUILD_MEMORY_PFX(__mem_, q, u64, 0) +#endif #define __BUILD_IOPORT_PFX(bus, bwlq, type) \ __BUILD_IOPORT_SINGLE(bus, bwlq, type, 1, 0,) \ @@ -488,12 +493,16 @@ __BUILDIO(q, u64) #define readb_relaxed __relaxed_readb #define readw_relaxed __relaxed_readw #define readl_relaxed __relaxed_readl +#ifdef CONFIG_64BIT #define readq_relaxed __relaxed_readq +#endif #define writeb_relaxed __relaxed_writeb #define writew_relaxed __relaxed_writew #define writel_relaxed __relaxed_writel +#ifdef CONFIG_64BIT #define writeq_relaxed __relaxed_writeq +#endif #define readb_be(addr) \ __raw_readb((__force unsigned *)(addr)) @@ -516,8 +525,10 @@ __BUILDIO(q, u64) /* * Some code tests for these symbols */ +#ifdef CONFIG_64BIT #define readq readq #define writeq writeq +#endif #define __BUILD_MEMORY_STRING(bwlq, type) \ \ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 41204a49cf95..7b47a323dc23 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -274,8 +274,12 @@ enum emulation_result { #define MIPS3_PG_SHIFT 6 #define MIPS3_PG_FRAME 0x3fffffc0 +#if defined(CONFIG_64BIT) +#define VPN2_MASK GENMASK(cpu_vmbits - 1, 13) +#else #define VPN2_MASK 0xffffe000 -#define KVM_ENTRYHI_ASID MIPS_ENTRYHI_ASID +#endif +#define KVM_ENTRYHI_ASID cpu_asid_mask(&boot_cpu_data) #define TLB_IS_GLOBAL(x) ((x).tlb_lo[0] & (x).tlb_lo[1] & ENTRYLO_G) #define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) #define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID) diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index bdbdc19a2b8f..3afdb39d092a 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -750,7 +750,7 @@ /* MAAR bit definitions */ #define MIPS_MAAR_VH (_U64CAST_(1) << 63) -#define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12) +#define MIPS_MAAR_ADDR GENMASK_ULL(55, 12) #define MIPS_MAAR_ADDR_SHIFT 12 #define MIPS_MAAR_S (_ULCAST_(1) << 1) #define MIPS_MAAR_VL (_ULCAST_(1) << 0) diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 93a9dce31f25..813dfe5f45a5 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -18,10 +18,12 @@ #include <asm/fixmap.h> #define __ARCH_USE_5LEVEL_HACK -#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) +#if CONFIG_PGTABLE_LEVELS == 2 #include <asm-generic/pgtable-nopmd.h> -#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48)) +#elif CONFIG_PGTABLE_LEVELS == 3 #include <asm-generic/pgtable-nopud.h> +#else +#include <asm-generic/5level-fixup.h> #endif /* @@ -216,6 +218,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) return pgd_val(pgd); } +#define pgd_phys(pgd) virt_to_phys((void *)pgd_val(pgd)) +#define pgd_page(pgd) (pfn_to_page(pgd_phys(pgd) >> PAGE_SHIFT)) + static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) { return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 4993db40482c..ee26f9a4575d 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -49,8 +49,26 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -/* How to get the thread information struct from C. */ +/* + * A pointer to the struct thread_info for the currently executing thread is + * held in register $28/$gp. + * + * We declare __current_thread_info as a global register variable rather than a + * local register variable within current_thread_info() because clang doesn't + * support explicit local register variables. + * + * When building the VDSO we take care not to declare the global register + * variable because this causes GCC to not preserve the value of $28/$gp in + * functions that change its value (which is common in the PIC VDSO when + * accessing the GOT). Since the VDSO shouldn't be accessing + * __current_thread_info anyway we declare it extern in order to cause a link + * failure if it's referenced. + */ +#ifdef __VDSO__ +extern struct thread_info *__current_thread_info; +#else register struct thread_info *__current_thread_info __asm__("$28"); +#endif static inline struct thread_info *current_thread_info(void) { diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index f777e44653d5..47312c529410 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -50,6 +50,25 @@ static int __init_cache_level(unsigned int cpu) return 0; } +static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + + for_each_possible_cpu(cpu1) + if (cpus_are_siblings(cpu, cpu1)) + cpumask_set_cpu(cpu1, cpu_map); +} + +static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + int cluster = cpu_cluster(&cpu_data[cpu]); + + for_each_possible_cpu(cpu1) + if (cpu_cluster(&cpu_data[cpu1]) == cluster) + cpumask_set_cpu(cpu1, cpu_map); +} + static int __populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; @@ -57,14 +76,20 @@ static int __populate_cache_leaves(unsigned int cpu) struct cacheinfo *this_leaf = this_cpu_ci->info_list; if (c->icache.waysize) { + /* L1 caches are per core */ + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA); + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST); } else { populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED); } - if (c->scache.waysize) + if (c->scache.waysize) { + /* L2 cache is per cluster */ + fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map); populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED); + } if (c->tcache.waysize) populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED); diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 398b905b027d..370ffc7dc6e3 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -477,20 +477,20 @@ NESTED(nmi_handler, PT_SIZE, sp) .endm .macro __build_clear_fpe + CLI + TRACE_IRQS_OFF .set push /* gas fails to assemble cfc1 for some archs (octeon).*/ \ .set mips1 SET_HARDFLOAT cfc1 a1, fcr31 .set pop - CLI - TRACE_IRQS_OFF .endm .macro __build_clear_msa_fpe - _cfcmsa a1, MSA_CSR CLI TRACE_IRQS_OFF + _cfcmsa a1, MSA_CSR .endm .macro __build_clear_ade diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index e5ea3db23d6b..a9eab83d9148 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -119,9 +119,9 @@ static char *cm2_causes[32] = { "COH_RD_ERR", "MMIO_WR_ERR", "MMIO_RD_ERR", "0x07", "0x08", "0x09", "0x0a", "0x0b", "0x0c", "0x0d", "0x0e", "0x0f", - "0x10", "0x11", "0x12", "0x13", - "0x14", "0x15", "0x16", "INTVN_WR_ERR", - "INTVN_RD_ERR", "0x19", "0x1a", "0x1b", + "0x10", "INTVN_WR_ERR", "INTVN_RD_ERR", "0x13", + "0x14", "0x15", "0x16", "0x17", + "0x18", "0x19", "0x1a", "0x1b", "0x1c", "0x1d", "0x1e", "0x1f" }; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index ab349d2381c3..569f46e95c5a 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -817,7 +817,17 @@ static void __init arch_mem_init(char **cmdline_p) crashk_res.end - crashk_res.start + 1); #endif device_tree_init(); + + /* + * In order to reduce the possibility of kernel panic when failed to + * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate + * low memory as small as possible before plat_swiotlb_setup(), so + * make sparse_init() using top-down allocation. + */ + memblock_set_bottom_up(false); sparse_init(); + memblock_set_bottom_up(true); + plat_swiotlb_setup(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 76fae9b79f13..712c15de6ab9 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -464,10 +464,10 @@ static void bmips_wr_vec(unsigned long dst, char *start, char *end) static inline void bmips_nmi_handler_setup(void) { - bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec, - &bmips_reset_nmi_vec_end); - bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec, - &bmips_smp_int_vec_end); + bmips_wr_vec(BMIPS_NMI_RESET_VEC, bmips_reset_nmi_vec, + bmips_reset_nmi_vec_end); + bmips_wr_vec(BMIPS_WARM_RESTART_VEC, bmips_smp_int_vec, + bmips_smp_int_vec_end); } struct reset_vec_info { diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile index a3d4bec695c6..6efb2f6889a7 100644 --- a/arch/mips/kernel/syscalls/Makefile +++ b/arch/mips/kernel/syscalls/Makefile @@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@ '$(syshdr_pfx_$(basetarget))' \ '$(syshdr_offset_$(basetarget))' -quiet_cmd_sysnr = SYSNR $@ +quiet_cmd_sysnr = SYSNR $@ cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \ '$(sysnr_abis_$(basetarget))' \ '$(sysnr_pfx_$(basetarget))' \ diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 37e9413a393d..caa01457dce6 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -18,12 +18,82 @@ #include <linux/smp.h> #include <linux/spinlock.h> #include <linux/export.h> +#include <linux/cpufreq.h> +#include <linux/delay.h> #include <asm/cpu-features.h> #include <asm/cpu-type.h> #include <asm/div64.h> #include <asm/time.h> +#ifdef CONFIG_CPU_FREQ + +static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref); +static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref_freq); +static unsigned long glb_lpj_ref; +static unsigned long glb_lpj_ref_freq; + +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpumask *cpus = freq->policy->cpus; + unsigned long lpj; + int cpu; + + /* + * Skip lpj numbers adjustment if the CPU-freq transition is safe for + * the loops delay. (Is this possible?) + */ + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + /* Save the initial values of the lpjes for future scaling. */ + if (!glb_lpj_ref) { + glb_lpj_ref = boot_cpu_data.udelay_val; + glb_lpj_ref_freq = freq->old; + + for_each_online_cpu(cpu) { + per_cpu(pcp_lpj_ref, cpu) = + cpu_data[cpu].udelay_val; + per_cpu(pcp_lpj_ref_freq, cpu) = freq->old; + } + } + + /* + * Adjust global lpj variable and per-CPU udelay_val number in + * accordance with the new CPU frequency. + */ + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + loops_per_jiffy = cpufreq_scale(glb_lpj_ref, + glb_lpj_ref_freq, + freq->new); + + for_each_cpu(cpu, cpus) { + lpj = cpufreq_scale(per_cpu(pcp_lpj_ref, cpu), + per_cpu(pcp_lpj_ref_freq, cpu), + freq->new); + cpu_data[cpu].udelay_val = (unsigned int)lpj; + } + } + + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +#endif /* CONFIG_CPU_FREQ */ + /* * forward reference */ diff --git a/arch/mips/kernel/topology.c b/arch/mips/kernel/topology.c index cd3e1f82e1a5..08ad6371fbe0 100644 --- a/arch/mips/kernel/topology.c +++ b/arch/mips/kernel/topology.c @@ -20,7 +20,7 @@ static int __init topology_init(void) for_each_present_cpu(i) { struct cpu *c = &per_cpu(cpu_devices, i); - c->hotpluggable = 1; + c->hotpluggable = !!i; ret = register_cpu(c, i); if (ret) printk(KERN_WARNING "topology_init: register_cpu %d " diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c52766a5b85f..ec09e594ac8a 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2126,6 +2126,7 @@ static void configure_status(void) change_c0_status(ST0_CU|ST0_MX|ST0_RE|ST0_FR|ST0_BEV|ST0_TS|ST0_KX|ST0_SX|ST0_UX, status_set); + back_to_back_c0_hazard(); } unsigned int hwrena; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 33ee0d18fb0a..eb9d7af93836 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -50,7 +50,7 @@ SECTIONS /* . = 0xa800000000300000; */ . = 0xffffffff80300000; #endif - . = VMLINUX_LOAD_ADDRESS; + . = LINKER_LOAD_ADDRESS; /* read-only */ _text = .; /* Text and read-only data */ .text : { diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 6176b9acba95..d0d832ab3d3b 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index b4323b2214e2..49a79cd67c0f 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -514,8 +514,8 @@ void __init ltq_soc_init(void) clkdev_add_pmu("1e10b308.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DP | PMU_PPE_TC); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); - clkdev_add_pmu("1e108000.gswip", "gphy0", 0, 0, PMU_GPHY); - clkdev_add_pmu("1e108000.gswip", "gphy1", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy0", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy1", 0, 0, PMU_GPHY); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "afe", 1, 2, PMU_ANALOG_DSL_AFE); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); @@ -538,8 +538,8 @@ void __init ltq_soc_init(void) PMU_SWITCH | PMU_PPE_DPLUS | PMU_PPE_DPLUM | PMU_PPE_EMA | PMU_PPE_TC | PMU_PPE_SLL01 | PMU_PPE_QSB | PMU_PPE_TOP); - clkdev_add_pmu("1e108000.gswip", "gphy0", 0, 0, PMU_GPHY); - clkdev_add_pmu("1e108000.gswip", "gphy1", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy0", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy1", 0, 0, PMU_GPHY); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c index 13f3404f0030..9674ae1361a8 100644 --- a/arch/mips/loongson64/loongson-3/platform.c +++ b/arch/mips/loongson64/loongson-3/platform.c @@ -27,6 +27,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 3faf80c894ec..f6ab5825c03c 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1483,6 +1483,7 @@ static void build_r4000_tlb_refill_handler(void) static void setup_pw(void) { + unsigned int pwctl; unsigned long pgd_i, pgd_w; #ifndef __PAGETABLE_PMD_FOLDED unsigned long pmd_i, pmd_w; @@ -1509,6 +1510,7 @@ static void setup_pw(void) pte_i = ilog2(_PAGE_GLOBAL); pte_w = 0; + pwctl = 1 << 30; /* Set PWDirExt */ #ifndef __PAGETABLE_PMD_FOLDED write_c0_pwfield(pgd_i << 24 | pmd_i << 12 | pt_i << 6 | pte_i); @@ -1519,8 +1521,9 @@ static void setup_pw(void) #endif #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT - write_c0_pwctl(1 << 6 | psn); + pwctl |= (1 << 6 | psn); #endif + write_c0_pwctl(pwctl); write_c0_kpgd((long)swapper_pg_dir); kscratch_used_mask |= (1 << 7); /* KScratch6 is used for KPGD */ } diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 46b76751f3a5..561154cbcc40 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -604,6 +604,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) { int off, b_off; + int tcc_reg; ctx->flags |= EBPF_SEEN_TC; /* @@ -616,14 +617,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) b_off = b_imm(this_idx + 1, ctx); emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); /* - * if (--TCC < 0) + * if (TCC-- < 0) * goto out; */ /* Delay slot */ - emit_instr(ctx, daddiu, MIPS_R_T5, - (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; + emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, MIPS_R_T5, b_off); + emit_instr(ctx, bltz, tcc_reg, b_off); /* * prog = array->ptrs[index]; * if (prog == NULL) @@ -1803,7 +1804,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int image_size; u8 *image_ptr; - if (!prog->jit_requested || MIPS_ISA_REV < 2) + if (!prog->jit_requested) return prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index bcf7f559789a..c1b8c545ef92 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -322,16 +322,15 @@ static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask, struct bridge_irq_chip_data *data = d->chip_data; int bit = d->parent_data->hwirq; int pin = d->hwirq; - nasid_t nasid; int ret, cpu; ret = irq_chip_set_affinity_parent(d, mask, force); if (ret >= 0) { cpu = cpumask_first_and(mask, cpu_online_mask); - nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); + data->nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); bridge_write(data->bc, b_int_addr[pin].addr, (((data->bc->intr_addr >> 30) & 0x30000) | - bit | (nasid << 8))); + bit | (data->nasid << 8))); bridge_read(data->bc, b_wid_tflush); } return ret; diff --git a/arch/mips/sgi-ip27/Kconfig b/arch/mips/sgi-ip27/Kconfig index ef3847e7aee0..e5b6cadbec85 100644 --- a/arch/mips/sgi-ip27/Kconfig +++ b/arch/mips/sgi-ip27/Kconfig @@ -38,10 +38,3 @@ config REPLICATE_KTEXT Say Y here to enable replicating the kernel text across multiple nodes in a NUMA cluster. This trades memory for speed. -config REPLICATE_EXHANDLERS - bool "Exception handler replication support" - depends on SGI_IP27 - help - Say Y here to enable replicating the kernel exception handlers - across multiple nodes in a NUMA cluster. This trades memory for - speed. diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c index 066b33f50bcc..db58ebf02870 100644 --- a/arch/mips/sgi-ip27/ip27-init.c +++ b/arch/mips/sgi-ip27/ip27-init.c @@ -69,23 +69,14 @@ static void per_hub_init(cnodeid_t cnode) hub_rtc_init(cnode); -#ifdef CONFIG_REPLICATE_EXHANDLERS - /* - * If this is not a headless node initialization, - * copy over the caliased exception handlers. - */ - if (get_compact_nodeid() == cnode) { - extern char except_vec2_generic, except_vec3_generic; - extern void build_tlb_refill_handler(void); - - memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80); - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80); - build_tlb_refill_handler(); - memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80); - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100); + if (nasid) { + /* copy exception handlers from first node to current node */ + memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0), + (void *)CKSEG0, 0x200); __flush_cache_all(); + /* switch to node local exception handlers */ + REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); } -#endif } void per_cpu_init(void) diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 37be04975831..79a2f6bd2b5a 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -73,6 +73,9 @@ static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask) int cpu; cpu = cpumask_first_and(mask, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_any(cpu_online_mask); + nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); hd->cpu = cpu; if (!cputoslice(cpu)) { @@ -139,6 +142,7 @@ static int hub_domain_alloc(struct irq_domain *domain, unsigned int virq, /* use CPU connected to nearest hub */ hub = hub_data(NASID_TO_COMPACT_NODEID(info->nasid)); setup_hub_mask(hd, &hub->h_cpus); + info->nasid = cpu_to_node(hd->cpu); /* Make sure it's not already pending when we connect it. */ REMOTE_HUB_CLR_INTR(info->nasid, swlevel); diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index fb077a947575..8624a885d95b 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -332,11 +332,7 @@ static void __init mlreset(void) * thinks it is a node 0 address. */ REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1)); -#ifdef CONFIG_REPLICATE_EXHANDLERS - REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); -#else REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0); -#endif #ifdef LATER /* diff --git a/arch/mips/tools/elf-entry.c b/arch/mips/tools/elf-entry.c index adde79ce7fc0..dbd14ff05b4c 100644 --- a/arch/mips/tools/elf-entry.c +++ b/arch/mips/tools/elf-entry.c @@ -51,11 +51,14 @@ int main(int argc, const char *argv[]) nread = fread(&hdr, 1, sizeof(hdr), file); if (nread != sizeof(hdr)) { perror("Unable to read input file"); + fclose(file); return EXIT_FAILURE; } - if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) + if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) { + fclose(file); die("Input is not an ELF\n"); + } switch (hdr.ehdr32.e_ident[EI_CLASS]) { case ELFCLASS32: @@ -67,6 +70,7 @@ int main(int argc, const char *argv[]) entry = be32toh(hdr.ehdr32.e_entry); break; default: + fclose(file); die("Invalid ELF encoding\n"); } @@ -83,14 +87,17 @@ int main(int argc, const char *argv[]) entry = be64toh(hdr.ehdr64.e_entry); break; default: + fclose(file); die("Invalid ELF encoding\n"); } break; default: + fclose(file); die("Invalid ELF class\n"); } printf("0x%016" PRIx64 "\n", entry); + fclose(file); return EXIT_SUCCESS; } diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index e4a78571f883..c6481cfc5220 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -1166,13 +1166,13 @@ ENTRY(__sys_clone) l.movhi r29,hi(sys_clone) l.ori r29,r29,lo(sys_clone) l.j _fork_save_extra_regs_and_call - l.addi r7,r1,0 + l.nop ENTRY(__sys_fork) l.movhi r29,hi(sys_fork) l.ori r29,r29,lo(sys_fork) l.j _fork_save_extra_regs_and_call - l.addi r3,r1,0 + l.nop ENTRY(sys_rt_sigreturn) l.jal _sys_rt_sigreturn diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c index 43f140a28bc7..54d38809e22c 100644 --- a/arch/openrisc/kernel/stacktrace.c +++ b/arch/openrisc/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> #include <linux/stacktrace.h> #include <asm/processor.h> @@ -68,12 +69,25 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { unsigned long *sp = NULL; + if (!try_get_task_stack(tsk)) + return; + if (tsk == current) sp = (unsigned long *) &sp; - else - sp = (unsigned long *) KSTK_ESP(tsk); + else { + unsigned long ksp; + + /* Locate stack from kernel context */ + ksp = task_thread_info(tsk)->ksp; + ksp += STACK_FRAME_OVERHEAD; /* redzone */ + ksp += sizeof(struct pt_regs); + + sp = (unsigned long *) ksp; + } unwind_stack(trace, sp, save_stack_address_nosched); + + put_task_stack(tsk); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 118953d41763..6dd4171c9530 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -212,6 +212,8 @@ atomic64_set(atomic64_t *v, s64 i) _atomic_spin_unlock_irqrestore(v, flags); } +#define atomic64_set_release(v, i) atomic64_set((v), (i)) + static __inline__ s64 atomic64_read(const atomic64_t *v) { diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h index dbaaca84f27f..640d46edf32e 100644 --- a/arch/parisc/include/asm/barrier.h +++ b/arch/parisc/include/asm/barrier.h @@ -26,6 +26,67 @@ #define __smp_rmb() mb() #define __smp_wmb() mb() +#define __smp_store_release(p, v) \ +do { \ + typeof(p) __p = (p); \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("stb,ma %0,0(%1)" \ + : : "r"(*(__u8 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("sth,ma %0,0(%1)" \ + : : "r"(*(__u16 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("stw,ma %0,0(%1)" \ + : : "r"(*(__u32 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("std,ma %0,0(%1)" \ + : : "r"(*(__u64 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + } \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + typeof(p) __p = (p); \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("ldb,ma 0(%1),%0" \ + : "=r"(*(__u8 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("ldh,ma 0(%1),%0" \ + : "=r"(*(__u16 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("ldw,ma 0(%1),%0" \ + : "=r"(*(__u32 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("ldd,ma 0(%1),%0" \ + : "=r"(*(__u64 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + } \ + __u.__val; \ +}) #include <asm-generic/barrier.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index f627c37dad9c..068958575871 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -44,8 +44,14 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) ** if (((unsigned long)p & 0xf) == 0) ** return __ldcw(p); */ -#define xchg(ptr, x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) _x_ = (x); \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)_x_, (ptr), sizeof(*(ptr))); \ + __ret; \ +}) /* bug catcher for when unsupported size is used - won't link */ extern void __cmpxchg_called_with_bad_pointer(void); @@ -54,6 +60,7 @@ extern void __cmpxchg_called_with_bad_pointer(void); extern unsigned long __cmpxchg_u32(volatile unsigned int *m, unsigned int old, unsigned int new_); extern u64 __cmpxchg_u64(volatile u64 *ptr, u64 old, u64 new_); +extern u8 __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new_); /* don't worry...optimizer will get rid of most of this */ static inline unsigned long @@ -65,6 +72,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size) #endif case 4: return __cmpxchg_u32((unsigned int *)ptr, (unsigned int)old, (unsigned int)new_); + case 1: return __cmpxchg_u8((u8 *)ptr, (u8)old, (u8)new_); } __cmpxchg_called_with_bad_pointer(); return old; diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 197d2247e4db..16aec9ba2580 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -37,12 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); -#ifdef CONFIG_SMP - (void) __ldcw(a); -#else - mb(); -#endif - *a = 1; + /* Release with ordered store. */ + __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory"); } static inline int arch_spin_trylock(arch_spinlock_t *x) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 3b330e58a4f0..a5f3e50fe976 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -810,7 +810,7 @@ EXPORT_SYMBOL(device_to_hwpath); static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high, struct device *parent); -static void walk_lower_bus(struct parisc_device *dev) +static void __init walk_lower_bus(struct parisc_device *dev) { unsigned long io_io_low, io_io_high; @@ -889,8 +889,8 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - pr_info("%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", - ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, + pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); if (dev->num_addrs) { diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 89c801c2b5d1..d4acd0fce6f9 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -454,7 +454,6 @@ nop LDREG 0(\ptp),\pte bb,<,n \pte,_PAGE_PRESENT_BIT,3f - LDCW 0(\tmp),\tmp1 b \fault stw \spc,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) @@ -464,23 +463,26 @@ 3: .endm - /* Release pa_tlb_lock lock without reloading lock address. */ - .macro tlb_unlock0 spc,tmp,tmp1 + /* Release pa_tlb_lock lock without reloading lock address. + Note that the values in the register spc are limited to + NR_SPACE_IDS (262144). Thus, the stw instruction always + stores a nonzero value even when register spc is 64 bits. + We use an ordered store to ensure all prior accesses are + performed prior to releasing the lock. */ + .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP 98: or,COND(=) %r0,\spc,%r0 - LDCW 0(\tmp),\tmp1 - or,COND(=) %r0,\spc,%r0 - stw \spc,0(\tmp) + stw,ma \spc,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm /* Release pa_tlb_lock lock. */ - .macro tlb_unlock1 spc,tmp,tmp1 + .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP 98: load_pa_tlb_lock \tmp 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) - tlb_unlock0 \spc,\tmp,\tmp1 + tlb_unlock0 \spc,\tmp #endif .endm @@ -1163,7 +1165,7 @@ dtlb_miss_20w: idtlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1189,7 +1191,7 @@ nadtlb_miss_20w: idtlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1223,7 +1225,7 @@ dtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1256,7 +1258,7 @@ nadtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1285,7 +1287,7 @@ dtlb_miss_20: idtlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1313,7 +1315,7 @@ nadtlb_miss_20: idtlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1420,7 +1422,7 @@ itlb_miss_20w: iitlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1444,7 +1446,7 @@ naitlb_miss_20w: iitlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1478,7 +1480,7 @@ itlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1502,7 +1504,7 @@ naitlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1532,7 +1534,7 @@ itlb_miss_20: iitlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1552,7 +1554,7 @@ naitlb_miss_20: iitlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1582,7 +1584,7 @@ dbit_trap_20w: idtlbt pte,prot - tlb_unlock0 spc,t0,t1 + tlb_unlock0 spc,t0 rfir nop #else @@ -1608,7 +1610,7 @@ dbit_trap_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock0 spc,t0,t1 + tlb_unlock0 spc,t0 rfir nop @@ -1628,7 +1630,7 @@ dbit_trap_20: idtlbt pte,prot - tlb_unlock0 spc,t0,t1 + tlb_unlock0 spc,t0 rfir nop #endif diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 97ac707c6bff..a37814cb66c7 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -640,11 +640,7 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw %r24, 0(%r26) /* Free lock */ -#ifdef CONFIG_SMP -98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -658,11 +654,7 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ -#ifdef CONFIG_SMP -98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) #endif @@ -863,11 +855,7 @@ cas2_action: cas2_end: /* Free lock */ -#ifdef CONFIG_SMP -98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ @@ -877,11 +865,7 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ -#ifdef CONFIG_SMP -98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 b lws_exit diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c index 70ffbcf889b8..2e4d1f05a926 100644 --- a/arch/parisc/lib/bitops.c +++ b/arch/parisc/lib/bitops.c @@ -79,3 +79,15 @@ unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsign _atomic_spin_unlock_irqrestore(ptr, flags); return (unsigned long)prev; } + +u8 __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new) +{ + unsigned long flags; + u8 prev; + + _atomic_spin_lock_irqsave(ptr, flags); + if ((prev = *ptr) == old) + *ptr = new; + _atomic_spin_unlock_irqrestore(ptr, flags); + return prev; +} diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ddca8287d43b..3e54484797f6 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -588,7 +588,7 @@ void __init mem_init(void) > BITS_PER_LONG); high_memory = __va((max_pfn << PAGE_SHIFT)); - set_max_mapnr(page_to_pfn(virt_to_page(high_memory - 1)) + 1); + set_max_mapnr(max_low_pfn); memblock_free_all(); #ifdef CONFIG_PA11 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f7a363cbc1bb..1cbadb60612d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -126,13 +126,14 @@ config PPC select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV + select ARCH_HAS_HUGEPD if HUGETLB_PAGE select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64 - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -214,8 +215,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_FREE select HAVE_MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN @@ -230,11 +230,13 @@ config PPC select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH select OF + select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select OF_EARLY_FLATTREE select OLD_SIGACTION if PPC32 select OLD_SIGSUSPEND select PCI_DOMAINS if PCI select PCI_SYSCALL if PCI + select PPC_DAWR if PPC64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE @@ -371,6 +373,9 @@ config PPC_ADV_DEBUG_DAC_RANGE depends on PPC_ADV_DEBUG_REGS && 44x default y +config PPC_DAWR + bool + config ZONE_DMA bool default y if PPC_BOOK3E_64 @@ -716,6 +721,7 @@ config THREAD_SHIFT range 13 15 default "15" if PPC_256K_PAGES default "14" if PPC64 + default "14" if KASAN default "13" help Used to define the stack size. The default is almost always what you diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index c59920920ddc..b915fe658979 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -371,7 +371,7 @@ config PPC_PTDUMP config PPC_DEBUG_WX bool "Warn on W+X mappings at boot" - depends on PPC_PTDUMP + depends on PPC_PTDUMP && STRICT_KERNEL_RWX help Generate a warning if any W+X mappings are found at boot. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index da9f9b48a63a..a120561f8433 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -91,11 +91,13 @@ MULTIPLEWORD := -mmultiple endif ifdef CONFIG_PPC64 +ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 endif +endif ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align @@ -142,6 +144,7 @@ endif endif CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) +ifndef CONFIG_CC_IS_CLANG ifdef CONFIG_CPU_LITTLE_ENDIAN CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) @@ -150,6 +153,7 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif +endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index 83f8e5ba2722..1dc88ad66867 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -17,11 +17,11 @@ quiet_cmd_head_check = CHKHEAD $@ quiet_cmd_relocs_check = CHKREL $@ ifdef CONFIG_PPC_BOOK3S_64 cmd_relocs_check = \ - $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" ; \ + $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@" else cmd_relocs_check = \ - $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" + $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" endif # `@true` prevents complaint when there is nothing to be done diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c index 1699e9531552..00c4d843a023 100644 --- a/arch/powerpc/boot/4xx.c +++ b/arch/powerpc/boot/4xx.c @@ -228,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void) dpath = 8; /* 64 bits */ /* get address pins (rows) */ - val = SDRAM0_READ(DDR0_42); + val = SDRAM0_READ(DDR0_42); row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT); if (row > max_row) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73d1f3562978..a6fd5d43a9ba 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -122,7 +122,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c -src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c +src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c ifndef CONFIG_PPC64_BOOT_WRAPPER src-wlib-y += crtsavres.S diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi index e1a961f05dcd..baa0c503e741 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi index c288f3c6c637..93095600e808 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy6: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi index 94f3e7175012..ff4bd38f0645 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi index 94a76982d214..1fa38ed6f59e 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy7: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi index b5ff5f71c6b8..a8cc9780c0c4 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi index ee44182c6348..8b8bd70c9382 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi index f05f0d775039..619c880b54d8 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy2: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi index a9114ec51075..d7ebb73a400d 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy3: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi index 44dd00ac7367..b151d696a069 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy4: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi index 5b1b84b58602..adc0ae0013a3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy5: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi index 0e1daaef9e74..435047e0e250 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy14: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi index 68c5ef779266..c098657cca0a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy15: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi index 605363cc1117..9d06824815f3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy8: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi index 1955dfa13634..70e947730c4b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy9: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi index 2c1476454ee0..ad96e6529595 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy10: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi index b8b541ff5fb0..034bc4b71f7a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy11: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi index 4b2cfddd1b15..93ca23d82b39 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy12: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi index 0a52ddf7cc17..23b3117a2fd2 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy13: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h index 2abc8e83b95e..9757d4f6331e 100644 --- a/arch/powerpc/boot/libfdt_env.h +++ b/arch/powerpc/boot/libfdt_env.h @@ -6,6 +6,8 @@ #include <string.h> #define INT_MAX ((int)(~0U>>1)) +#define UINT32_MAX ((u32)~0U) +#define INT32_MAX ((s32)(UINT32_MAX >> 1)) #include "of.h" diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index b0491b8c0199..afef4fcdea98 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -129,7 +129,7 @@ int serial_console_init(void) dt_is_compatible(devp, "fsl,cpm2-smc-uart")) rc = cpm_console_init(devp, &serial_cd); #endif -#ifdef CONFIG_PPC_MPC52XX +#ifdef CONFIG_PPC_MPC52xx else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 9c63b596e6ce..a09595f00cab 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -28,7 +28,7 @@ static inline int arch_get_random_seed_int(unsigned int *v) unsigned long val; int rc; - rc = arch_get_random_long(&val); + rc = arch_get_random_seed_long(&val); if (rc) *v = val; diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 677e9babef80..6eb311eb818b 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_KUP_H #define _ASM_POWERPC_BOOK3S_32_KUP_H +#include <asm/bug.h> #include <asm/book3s/32/mmu-hash.h> #ifdef __ASSEMBLY__ @@ -75,7 +76,7 @@ .macro kuap_check current, gpr #ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr2, KUAP(thread) + lwz \gpr, THREAD + KUAP(\current) 999: twnei \gpr, 0 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) #endif @@ -91,6 +92,7 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) { + addr &= 0xf0000000; /* align addr to start of segment */ barrier(); /* make sure thread.kuap is updated before playing with SRs */ while (addr < end) { mtsrin(sr, addr); @@ -101,11 +103,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync(); /* Context sync required after mtsrin() */ } -static inline void allow_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr, end; - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (!(dir & KUAP_WRITE)) return; addr = (__force u32)to; @@ -118,11 +122,16 @@ static inline void allow_user_access(void __user *to, const void __user *from, u kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ } -static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void prevent_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr = (__force u32)to; u32 end = min(addr + size, TASK_SIZE); + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (!(dir & KUAP_WRITE)) + return; + if (!addr || addr >= TASK_SIZE || !size) return; @@ -130,12 +139,17 @@ static inline void prevent_user_access(void __user *to, const void __user *from, kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { + unsigned long begin = regs->kuap & 0xf0000000; + unsigned long end = regs->kuap << 28; + if (!is_write) return false; - return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); + return WARN(address < begin || address >= end, + "Bug: write fault blocked by segment registers !"); } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 998317702630..dc5c039eb28e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { @@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 8fd8599c9395..3f9ae3585ab9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -156,6 +156,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int hash__has_transparent_hugepage(void); #endif +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + BUG(); + return pmd; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index d1d9177d9ebd..0729c034e56f 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -246,7 +246,7 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, */ static inline int hash__pmd_trans_huge(pmd_t pmd) { - return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) == (_PAGE_PTE | H_PAGE_THP_HUGE)); } @@ -272,6 +272,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index f254de956d6a..c8d1076e0ebb 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -77,25 +77,27 @@ static inline void set_kuap(unsigned long value) isync(); } -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { // This is written so we can resolve to a single case at build time - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (dir == KUAP_READ) set_kuap(AMR_KUAP_BLOCK_WRITE); - else if (__builtin_constant_p(from) && from == NULL) + else if (dir == KUAP_WRITE) set_kuap(AMR_KUAP_BLOCK_READ); else set_kuap(0); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 15b75005bc34..3fa1b962dc27 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -600,8 +600,11 @@ extern void slb_set_size(u16 size); * */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) + +// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel #define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ - MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT) + MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2) + /* * For platforms that support on 65bit VA we limit the context bits */ diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index d5a44912902f..cae9e814593a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list; extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); -#ifdef CONFIG_SMP extern void __tlb_remove_table(void *_table); -#endif void pte_frag_destroy(void *pte_frag); static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ccf00a8b98c6..e8b60bc8e7b9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -990,10 +990,25 @@ extern struct page *pgd_page(pgd_t pgd); #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) #define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) -#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) -#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) -#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) +static inline unsigned long pgd_index(unsigned long address) +{ + return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1); +} + +static inline unsigned long pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} + +static inline unsigned long pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} + +static inline unsigned long pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} /* * Find an entry in a page-table-directory. We combine the address region @@ -1295,7 +1310,9 @@ extern void serialize_against_pte_lookup(struct mm_struct *mm); static inline pmd_t pmd_mkdevmap(pmd_t pmd) { - return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); + if (radix_enabled()) + return radix__pmd_mkdevmap(pmd); + return hash__pmd_mkdevmap(pmd); } static inline int pmd_devmap(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 574eca33f893..0efada5b4600 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -257,6 +257,11 @@ extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int radix__has_transparent_hugepage(void); #endif +static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); +} + extern int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 3d76e1c388c2..28c3d936fdf3 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -27,12 +27,12 @@ struct drmem_lmb_info { extern struct drmem_lmb_info *drmem_info; #define for_each_drmem_lmb_in_range(lmb, start, end) \ - for ((lmb) = (start); (lmb) <= (end); (lmb)++) + for ((lmb) = (start); (lmb) < (end); (lmb)++) #define for_each_drmem_lmb(lmb) \ for_each_drmem_lmb_in_range((lmb), \ &drmem_info->lmbs[0], \ - &drmem_info->lmbs[drmem_info->n_lmbs - 1]) + &drmem_info->lmbs[drmem_info->n_lmbs]) /* * The of_drconf_cell_v1 struct defines the layout of the LMB data diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 0cfc365d814b..722289a1d000 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -77,7 +77,12 @@ enum fixed_addresses { static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { - map_kernel_page(fix_to_virt(idx), phys, flags); + if (__builtin_constant_p(idx)) + BUILD_BUG_ON(idx >= __end_of_fixed_addresses); + else if (WARN_ON(idx >= __end_of_fixed_addresses)) + return; + + map_kernel_page(__fix_to_virt(idx), phys, flags); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index eea28ca679db..bc7d9d06a6d9 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,7 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; - allow_write_to_user(uaddr, sizeof(*uaddr)); + allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -62,7 +62,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, *oval = oldval; - prevent_write_to_user(uaddr, sizeof(*uaddr)); + prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } @@ -76,7 +76,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; - allow_write_to_user(uaddr, sizeof(*uaddr)); + allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -97,7 +98,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; - prevent_write_to_user(uaddr, sizeof(*uaddr)); + prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + return ret; } diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 78202d5fb13a..67e2da195eae 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -76,18 +76,25 @@ static inline void hw_breakpoint_disable(void) extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); int hw_breakpoint_handler(struct die_args *args); -extern int set_dawr(struct arch_hw_breakpoint *brk); +#else /* CONFIG_HAVE_HW_BREAKPOINT */ +static inline void hw_breakpoint_disable(void) { } +static inline void thread_change_pc(struct task_struct *tsk, + struct pt_regs *regs) { } + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + +#ifdef CONFIG_PPC_DAWR extern bool dawr_force_enable; static inline bool dawr_enabled(void) { return dawr_force_enable; } - -#else /* CONFIG_HAVE_HW_BREAKPOINT */ -static inline void hw_breakpoint_disable(void) { } -static inline void thread_change_pc(struct task_struct *tsk, - struct pt_regs *regs) { } +int set_dawr(struct arch_hw_breakpoint *brk); +#else static inline bool dawr_enabled(void) { return false; } -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; } +#endif + #endif /* __KERNEL__ */ #endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */ diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 296e51c2f066..6db06f58deed 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -23,9 +23,7 @@ #define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) -#define KASAN_SHADOW_END 0UL - -#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) +#define KASAN_SHADOW_END (-(-KASAN_SHADOW_START >> KASAN_SHADOW_SCALE_SHIFT)) #ifdef CONFIG_KASAN void kasan_early_init(void); diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 5b5e39643a27..94f24928916a 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -2,6 +2,10 @@ #ifndef _ASM_POWERPC_KUP_H_ #define _ASM_POWERPC_KUP_H_ +#define KUAP_READ 1 +#define KUAP_WRITE 2 +#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) + #ifdef CONFIG_PPC64 #include <asm/book3s/64/kup-radix.h> #endif @@ -42,32 +46,48 @@ void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) { } + unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) { } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } + unsigned long size, unsigned long dir) { } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return false; +} #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) { - allow_user_access(NULL, from, size); + allow_user_access(NULL, from, size, KUAP_READ); } static inline void allow_write_to_user(void __user *to, unsigned long size) { - allow_user_access(to, NULL, size); + allow_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void allow_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + allow_user_access(to, from, size, KUAP_READ_WRITE); } static inline void prevent_read_from_user(const void __user *from, unsigned long size) { - prevent_user_access(NULL, from, size); + prevent_user_access(NULL, from, size, KUAP_READ); } static inline void prevent_write_to_user(void __user *to, unsigned long size) { - prevent_user_access(to, NULL, size); + prevent_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void prevent_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + prevent_user_access(to, from, size, KUAP_READ_WRITE); } #endif /* !__ASSEMBLY__ */ -#endif /* _ASM_POWERPC_KUP_H_ */ +#endif /* _ASM_POWERPC_KUAP_H_ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index bb7c8cc77f1a..04b2b927bb5a 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -535,7 +535,7 @@ static inline void note_hpte_modification(struct kvm *kvm, */ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) { - return rcu_dereference_raw_notrace(kvm->memslots[0]); + return rcu_dereference_raw_check(kvm->memslots[0]); } extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 1c3133b5f86a..6fe97465e350 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -34,18 +34,19 @@ #include <asm/reg.h> static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_INIT); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_KUAP); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), "Bug: fault blocked by AP register !"); diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 332b13b4ecdb..29c43665a753 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { unsigned long pgf = (unsigned long)table; @@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 0d52f57fca04..f3e5b3c73432 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -285,8 +285,13 @@ static inline bool pfn_valid(unsigned long pfn) /* * Some number of bits at the level of the page table that points to * a hugepte are used to encode the size. This masks those bits. + * On 8xx, HW assistance requires 4k alignment for the hugepte. */ +#ifdef CONFIG_PPC_8xx +#define HUGEPD_SHIFT_MASK 0xfff +#else #define HUGEPD_SHIFT_MASK 0x3f +#endif #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index dce863a7635c..8e5b7d0b851c 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -10,8 +10,6 @@ #ifdef CONFIG_SMP -#include <asm/paca.h> - #define __my_cpu_offset local_paca->data_offset #endif /* CONFIG_SMP */ @@ -19,4 +17,6 @@ #include <asm-generic/percpu.h> +#include <asm/paca.h> + #endif /* _ASM_POWERPC_PERCPU_H_ */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 7426d7a90e1e..7aba3c7ea25c 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -12,6 +12,8 @@ #ifdef CONFIG_PPC_PERF_CTRS #include <asm/perf_event_server.h> +#else +static inline bool is_sier_available(void) { return false; } #endif #ifdef CONFIG_FSL_EMB_PERF_EVENT diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 64145751b2fd..f7ee707734bc 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -144,13 +144,9 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p) #define is_ioremap_addr is_ioremap_addr static inline bool is_ioremap_addr(const void *x) { -#ifdef CONFIG_MMU unsigned long addr = (unsigned long)x; return addr >= IOREMAP_BASE && addr < IOREMAP_END; -#else - return false; -#endif } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ef573fe9873e..c92795f55ffb 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -291,7 +291,6 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ - .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .addr_limit = KERNEL_DS, \ .fpexc_mode = 0, \ .fscr = FSCR_TAR | FSCR_EBB \ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3c1887351c71..bd227e0eab07 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -368,8 +368,6 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_online_cpus_mask(cpumask_var_t cpus); -extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(u64 handle); struct rtc_time; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index ccf44c135389..7c05e95a5c44 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -9,7 +9,7 @@ #define _ASM_POWERPC_SECURITY_FEATURES_H -extern unsigned long powerpc_security_features; +extern u64 powerpc_security_features; extern bool rfi_flush; /* These are bit flags */ @@ -24,17 +24,17 @@ void setup_stf_barrier(void); void do_stf_barrier_fixups(enum stf_barrier_type types); void setup_count_cache_flush(void); -static inline void security_ftr_set(unsigned long feature) +static inline void security_ftr_set(u64 feature) { powerpc_security_features |= feature; } -static inline void security_ftr_clear(unsigned long feature) +static inline void security_ftr_clear(u64 feature) { powerpc_security_features &= ~feature; } -static inline bool security_ftr_enabled(unsigned long feature) +static inline bool security_ftr_enabled(u64 feature) { return !!(powerpc_security_features & feature); } diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h index d995061f5f86..f7d9b9f5c7ff 100644 --- a/arch/powerpc/include/asm/setjmp.h +++ b/arch/powerpc/include/asm/setjmp.h @@ -7,7 +7,9 @@ #define JMP_BUF_LEN 23 -extern long setjmp(long *); -extern void longjmp(long *, long); +typedef long jmp_buf[JMP_BUF_LEN]; + +extern int setjmp(jmp_buf env); +extern void longjmp(jmp_buf env, int val); #endif /* _ASM_POWERPC_SETJMP_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index a47f827bc5f1..ff0dffb19aea 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -15,6 +15,7 @@ * * (the type definitions are in asm/spinlock_types.h) */ +#include <linux/jump_label.h> #include <linux/irqflags.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -36,10 +37,12 @@ #endif #ifdef CONFIG_PPC_PSERIES +DECLARE_STATIC_KEY_FALSE(shared_processor); + #define vcpu_is_preempted vcpu_is_preempted static inline bool vcpu_is_preempted(int cpu) { - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + if (!static_branch_unlikely(&shared_processor)) return false; return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); } diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index b2c0be93929d..7f3a8b902325 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -26,6 +26,17 @@ #define tlb_flush tlb_flush extern void tlb_flush(struct mmu_gather *tlb); +/* + * book3s: + * Hash does not use the linux page-tables, so we can avoid + * the TLB invalidate for page-table freeing, Radix otoh does use the + * page-tables and needs the TLBI. + * + * nohash: + * We still do TLB invalidate in the __pte_free_tlb routine before we + * add the page table pages to mmu gather table batch. + */ +#define tlb_needs_table_invalidate() radix_enabled() /* Get the generic bits... */ #include <asm-generic/tlb.h> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 8b03eb44e876..d02ac21558b4 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -313,9 +313,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) unsigned long ret; barrier_nospec(); - allow_user_access(to, from, n); + allow_read_write_user(to, from, n); ret = __copy_tofrom_user(to, from, n); - prevent_user_access(to, from, n); + prevent_read_write_user(to, from, n); return ret; } #endif /* __powerpc64__ */ @@ -387,7 +387,7 @@ static inline unsigned long raw_copy_to_user(void __user *to, return ret; } -extern unsigned long __clear_user(void __user *addr, unsigned long size); +unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { @@ -395,12 +395,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(addr, size))) { allow_write_to_user(addr, size); - ret = __clear_user(addr, size); + ret = __arch_clear_user(addr, size); prevent_write_to_user(addr, size); } return ret; } +static inline unsigned long __clear_user(void __user *addr, unsigned long size) +{ + return clear_user(addr, size); +} + extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index c61d59ed3b45..2ccb938d8544 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -82,6 +82,7 @@ struct vdso_data { __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */ __s64 wtom_clock_sec; /* Wall to monotonic clock sec */ struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ + __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ }; @@ -103,6 +104,7 @@ struct vdso_data { __s32 wtom_clock_nsec; struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 dcache_block_size; /* L1 d-cache block size */ __u32 icache_block_size; /* L1 i-cache block size */ diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index f2dfcd50a2d3..33aee7490cbb 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -39,6 +39,7 @@ #define XIVE_ESB_VAL_P 0x2 #define XIVE_ESB_VAL_Q 0x1 +#define XIVE_ESB_INVALID 0xFF /* * Thread Management (aka "TM") registers diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 64a7d8f693ff..447b7d935f4c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -7,9 +7,6 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' $(call cc-disable-warning, CFLAGS_ptrace.o += $(call cc-disable-warning, array-bounds) CFLAGS_syscalls.o += $(call cc-disable-warning, attribute-alias) -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) - ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif @@ -59,6 +56,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_PPC_DAWR) += dawr.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 31dc7e64cbfc..31e501383125 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -385,6 +385,7 @@ int main(void) OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); OFFSET(STAMP_XTIME, vdso_data, stamp_xtime); OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); + OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); @@ -415,7 +416,6 @@ int main(void) DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index bfe5f4a2886b..3b7cb01a4139 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2191,11 +2191,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, * oprofile_cpu_type already has a value, then we are * possibly overriding a real PVR with a logical one, * and, in that case, keep the current value for - * oprofile_cpu_type. + * oprofile_cpu_type. Futhermore, let's ensure that the + * fix for the PMAO bug is enabled on compatibility mode. */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; t->oprofile_type = old.oprofile_type; + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c new file mode 100644 index 000000000000..5f66b95b6858 --- /dev/null +++ b/arch/powerpc/kernel/dawr.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * DAWR infrastructure + * + * Copyright 2019, Michael Neuling, IBM Corporation. + */ + +#include <linux/types.h> +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <asm/debugfs.h> +#include <asm/machdep.h> +#include <asm/hvcall.h> + +bool dawr_force_enable; +EXPORT_SYMBOL_GPL(dawr_force_enable); + +int set_dawr(struct arch_hw_breakpoint *brk) +{ + unsigned long dawr, dawrx, mrd; + + dawr = brk->address; + + dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) + << (63 - 58); + dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) << (63 - 59); + dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3; + /* + * DAWR length is stored in field MDR bits 48:53. Matches range in + * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and + * 0b111111=64DW. + * brk->len is in bytes. + * This aligns up to double word size, shifts and does the bias. + */ + mrd = ((brk->len + 7) >> 3) - 1; + dawrx |= (mrd & 0x3f) << (63 - 53); + + if (ppc_md.set_dawr) + return ppc_md.set_dawr(dawr, dawrx); + + mtspr(SPRN_DAWR, dawr); + mtspr(SPRN_DAWRX, dawrx); + + return 0; +} + +static void set_dawr_cb(void *info) +{ + set_dawr(info); +} + +static ssize_t dawr_write_file_bool(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct arch_hw_breakpoint null_brk = {0, 0, 0}; + size_t rc; + + /* Send error to user if they hypervisor won't allow us to write DAWR */ + if (!dawr_force_enable && + firmware_has_feature(FW_FEATURE_LPAR) && + set_dawr(&null_brk) != H_SUCCESS) + return -ENODEV; + + rc = debugfs_write_file_bool(file, user_buf, count, ppos); + if (rc) + return rc; + + /* If we are clearing, make sure all CPUs have the DAWR cleared */ + if (!dawr_force_enable) + smp_call_function(set_dawr_cb, &null_brk, 0); + + return rc; +} + +static const struct file_operations dawr_enable_fops = { + .read = debugfs_read_file_bool, + .write = dawr_write_file_bool, + .open = simple_open, + .llseek = default_llseek, +}; + +static int __init dawr_force_setup(void) +{ + if (cpu_has_feature(CPU_FTR_DAWR)) { + /* Don't setup sysfs file for user control on P8 */ + dawr_force_enable = true; + return 0; + } + + if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { + /* Turn DAWR off by default, but allow admin to turn it on */ + debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, + powerpc_debugfs_root, + &dawr_force_enable, + &dawr_enable_fops); + } + return 0; +} +arch_initcall(dawr_force_setup); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 864cc55fa03c..cf55c756f829 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -139,7 +139,6 @@ static void __init cpufeatures_setup_cpu(void) /* Initialize the base environment -- clear FSCR/HFSCR. */ hv_mode = !!(mfmsr() & MSR_HV); if (hv_mode) { - /* CPU_FTR_HVMODE is used early in PACA setup */ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; mtspr(SPRN_HFSCR, 0); } @@ -346,6 +345,14 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f) { u64 lpcr; + /* + * Linux relies on FSCR[DSCR] being clear, so that we can take the + * facility unavailable interrupt and track the task's usage of DSCR. + * See facility_unavailable_exception(). + * Clear the bit here so that feat_enable() doesn't set it. + */ + f->fscr_bit_nr = -1; + feat_enable(f); lpcr = mfspr(SPRN_LPCR); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index fe0c32fb9f96..d147bc212405 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -540,12 +540,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); edev->pdev = NULL; - - /* - * We have to set the VF PE number to invalid one, which is - * required to plug the VF successfully. - */ - pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) list_add(&edev->rmv_entry, &rmv_data->removed_vf_list); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 85fdb6d879f1..2c46a45f53e6 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -179,7 +179,7 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ - kuap_save_and_lock r11, r12, r9, r2, r0 + kuap_save_and_lock r11, r12, r9, r2, r6 addi r2, r12, -THREAD lwz r9,KSP_LIMIT(r12) cmplw r1,r9 /* if r1 <= ksp_limit */ @@ -284,6 +284,7 @@ reenable_mmu: rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ kuap_restore r11, r2, r3, r4, r5 + lwz r2, GPR2(r11) b fast_exception_return #endif @@ -696,7 +697,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ - kuap_check r2, r4 + kuap_check r2, r0 #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3e564536a237..7c07c66cd80b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -424,17 +424,19 @@ EXC_COMMON_BEGIN(machine_check_idle_common) bl machine_check_queue_event /* - * We have not used any non-volatile GPRs here, and as a rule - * most exception code including machine check does not. - * Therefore PACA_NAPSTATELOST does not need to be set. Idle - * wakeup will restore volatile registers. + * GPR-loss wakeups are relatively straightforward, because the + * idle sleep code has saved all non-volatile registers on its + * own stack, and r1 in PACAR1. * - * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce. + * For no-loss wakeups the r1 and lr registers used by the + * early machine check handler have to be restored first. r2 is + * the kernel TOC, so no need to restore it. * * Then decrement MCE nesting after finishing with the stack. */ ld r3,_MSR(r1) ld r4,_LINK(r1) + ld r1,GPR1(r1) lhz r11,PACA_IN_MCE(r13) subi r11,r11,1 @@ -443,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_idle_common) mtlr r4 rlwinm r10,r3,47-31,30,31 cmpwi cr1,r10,2 - bltlr cr1 /* no state loss, return to idle caller */ + bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ b idle_return_gpr_loss #endif /* diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index b5a5c6896019..b838f08929cb 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -900,6 +900,7 @@ p_toc: .8byte __toc_start + 0x8000 - 0b /* * This is where the main kernel code starts. */ +__REF start_here_multiplatform: /* set up the TOC */ bl relative_toc @@ -943,15 +944,8 @@ start_here_multiplatform: std r0,0(r4) #endif - /* The following gets the stack set up with the regs */ - /* pointing to the real addr of the kernel stack. This is */ - /* all done to support the C function call below which sets */ - /* up the htab. This is done because we have relocated the */ - /* kernel but are still running in real mode. */ - - LOAD_REG_ADDR(r3,init_thread_union) - /* set up a stack pointer */ + LOAD_REG_ADDR(r3,init_thread_union) LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 @@ -975,6 +969,7 @@ start_here_multiplatform: RFI b . /* prevent speculative execution */ + .previous /* This is where all platforms converge execution */ start_here_common: diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5ab9178c2347..acd038b59dd3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -288,7 +288,7 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - rlwimi r10, r10, 0, 0x0f00 /* Clear bits 20-23 */ + rlwinm r10, r10, 0, ~0x0f00 /* Clear bits 20-23 */ rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */ ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index adf0505dbe02..519d49547e2f 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -238,6 +238,9 @@ set_ivor: bl early_init +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif #ifdef CONFIG_RELOCATABLE mr r3,r30 mr r4,r31 @@ -264,9 +267,6 @@ set_ivor: /* * Decide what sort of machine this is and initialize the MMU. */ -#ifdef CONFIG_KASAN - bl kasan_early_init -#endif mr r3,r30 mr r4,r31 bl machine_init diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 50262597c222..c8d1fa2e9d53 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -366,64 +366,3 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) { /* TODO */ } - -bool dawr_force_enable; -EXPORT_SYMBOL_GPL(dawr_force_enable); - -static void set_dawr_cb(void *info) -{ - set_dawr(info); -} - -static ssize_t dawr_write_file_bool(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct arch_hw_breakpoint null_brk = {0, 0, 0}; - size_t rc; - - /* Send error to user if they hypervisor won't allow us to write DAWR */ - if ((!dawr_force_enable) && - (firmware_has_feature(FW_FEATURE_LPAR)) && - (set_dawr(&null_brk) != H_SUCCESS)) - return -1; - - rc = debugfs_write_file_bool(file, user_buf, count, ppos); - if (rc) - return rc; - - /* If we are clearing, make sure all CPUs have the DAWR cleared */ - if (!dawr_force_enable) - smp_call_function(set_dawr_cb, &null_brk, 0); - - return rc; -} - -static const struct file_operations dawr_enable_fops = { - .read = debugfs_read_file_bool, - .write = dawr_write_file_bool, - .open = simple_open, - .llseek = default_llseek, -}; - -static int __init dawr_force_setup(void) -{ - dawr_force_enable = false; - - if (cpu_has_feature(CPU_FTR_DAWR)) { - /* Don't setup sysfs file for user control on P8 */ - dawr_force_enable = true; - return 0; - } - - if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { - /* Turn DAWR off by default, but allow admin to turn it on */ - dawr_force_enable = false; - debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, - powerpc_debugfs_root, - &dawr_force_enable, - &dawr_enable_fops); - } - return 0; -} -arch_initcall(dawr_force_setup); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5645bc9cbc09..add67498c126 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -619,8 +619,6 @@ void __do_irq(struct pt_regs *regs) trace_irq_entry(regs); - check_stack_overflow(); - /* * Query the platform PIC for the interrupt & ack it. * @@ -652,6 +650,8 @@ void do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; + check_stack_overflow(); + /* Already there ? */ if (unlikely(cursp == irqsp || cursp == sirqsp)) { __do_irq(regs); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2d27ec4feee4..9b340af02c38 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -264,6 +264,9 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; + if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index c4ed328a7b96..7a1c11a7cba5 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -114,11 +114,12 @@ void machine_kexec(struct kimage *image) void __init reserve_crashkernel(void) { - unsigned long long crash_size, crash_base; + unsigned long long crash_size, crash_base, total_mem_sz; int ret; + total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); /* use common parsing */ - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; @@ -177,6 +178,7 @@ void __init reserve_crashkernel(void) /* Crash kernel trumps memory limit */ if (memory_limit && memory_limit <= crashk_res.end) { memory_limit = crashk_res.end + 1; + total_mem_sz = memory_limit; printk("Adjusted memory limit for crashkernel, now 0x%llx\n", memory_limit); } @@ -185,7 +187,7 @@ void __init reserve_crashkernel(void) "for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), (unsigned long)(crashk_res.start >> 20), - (unsigned long)(memblock_phys_mem_size() >> 20)); + (unsigned long)(total_mem_sz >> 20)); if (!memblock_is_region_memory(crashk_res.start, crash_size) || memblock_reserve(crashk_res.start, crash_size)) { diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index d4d096f80f4b..19b5c4bb7d01 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -82,7 +82,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 1: dcbst 0,r6 @@ -98,7 +98,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 2: icbi 0,r6 diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index e3ad8aa4730d..01800e5c4118 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -134,7 +134,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init initialise_paca(struct paca_struct *new_paca, int cpu) +void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -163,7 +163,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) } /* Put the paca pointer into r13 and SPRG_PACA */ -void setup_paca(struct paca_struct *new_paca) +void __nostackprotector setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; @@ -172,11 +172,15 @@ void setup_paca(struct paca_struct *new_paca) /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); #else - /* In HV mode, we setup both HPACA and PACA to avoid problems + /* + * In HV mode, we setup both HPACA and PACA to avoid problems * if we do a GET_PACA() before the feature fixups have been - * applied + * applied. + * + * Normally you should test against CPU_FTR_HVMODE, but CPU features + * are not yet set up when we first reach here. */ - if (early_cpu_has_feature(CPU_FTR_HVMODE)) + if (mfmsr() & MSR_HV) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index c4c8c237a106..eca72c460151 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -244,9 +244,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) continue; #ifdef CONFIG_EEH - /* Release EEH device for the VF */ + /* + * Release EEH state for this VF. The PCI core + * has already torn down the pci_dev for this VF, but + * we're responsible to removing the eeh_dev since it + * has the same lifetime as the pci_dn that spawned it. + */ edev = pdn_to_eeh_dev(pdn); if (edev) { + /* + * We allocate pci_dn's for the totalvfs count, + * but only only the vfs that were activated + * have a configured PE. + */ + if (edev->pe) + eeh_rmv_from_parent_pe(edev); + pdn->edev = NULL; kfree(edev); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4f9bfe9fd960..642b248ff41f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -780,34 +780,6 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) return __set_dabr(dabr, dabrx); } -int set_dawr(struct arch_hw_breakpoint *brk) -{ - unsigned long dawr, dawrx, mrd; - - dawr = brk->address; - - dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \ - << (63 - 58); //* read/write bits */ - dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \ - << (63 - 59); //* translate */ - dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \ - >> 3; //* PRIM bits */ - /* dawr length is stored in field MDR bits 48:53. Matches range in - doublewords (64 bits) baised by -1 eg. 0b000000=1DW and - 0b111111=64DW. - brk->len is in bytes. - This aligns up to double word size, shifts and does the bias. - */ - mrd = ((brk->len + 7) >> 3) - 1; - dawrx |= (mrd & 0x3f) << (63 - 53); - - if (ppc_md.set_dawr) - return ppc_md.set_dawr(dawr, dawrx); - mtspr(SPRN_DAWR, dawr); - mtspr(SPRN_DAWRX, dawrx); - return 0; -} - void __set_breakpoint(struct arch_hw_breakpoint *brk) { memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7159e791a70d..0d1a67b3e8da 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -684,6 +684,23 @@ static void __init tm_init(void) static void tm_init(void) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC64 +static void __init save_fscr_to_task(void) +{ + /* + * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we + * have configured via the device tree features or via __init_FSCR(). + * That value will then be propagated to pid 1 (init) and all future + * processes. + */ + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + init_task.thread.fscr = mfspr(SPRN_FSCR); +} +#else +static inline void save_fscr_to_task(void) {}; +#endif + + void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -769,6 +786,8 @@ void __init early_init_devtree(void *params) BUG(); } + save_fscr_to_task(); + #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) /* We'll later wait for secondaries to check in; there are * NCPUS-1 non-boot CPUs :-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 6d600c977c09..e1186a0deb23 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1037,7 +1037,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .reserved2 = 0, .reserved3 = 0, .subprocessors = 1, - .byte22 = OV5_FEAT(OV5_DRMEM_V2), + .byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO), .intarch = 0, .mmu = 0, .hash_ext = 0, diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 65cd96c3b1d6..5e468938a661 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -841,97 +841,6 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -enum rtas_cpu_state { - DOWN, - UP, -}; - -#ifndef CONFIG_SMP -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - if (!cpumask_empty(cpus)) { - cpumask_clear(cpus); - return -EINVAL; - } else - return 0; -} -#else -/* On return cpumask will be altered to indicate CPUs changed. - * CPUs with states changed will be set in the mask, - * CPUs with status unchanged will be unset in the mask. */ -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - int cpu; - int cpuret = 0; - int ret = 0; - - if (cpumask_empty(cpus)) - return 0; - - for_each_cpu(cpu, cpus) { - struct device *dev = get_cpu_device(cpu); - - switch (state) { - case DOWN: - cpuret = device_offline(dev); - break; - case UP: - cpuret = device_online(dev); - break; - } - if (cpuret < 0) { - pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", - __func__, - ((state == UP) ? "up" : "down"), - cpu, cpuret); - if (!ret) - ret = cpuret; - if (state == UP) { - /* clear bits for unchanged cpus, return */ - cpumask_shift_right(cpus, cpus, cpu); - cpumask_shift_left(cpus, cpus, cpu); - break; - } else { - /* clear bit for unchanged cpu, continue */ - cpumask_clear_cpu(cpu, cpus); - } - } - } - - return ret; -} -#endif - -int rtas_online_cpus_mask(cpumask_var_t cpus) -{ - int ret; - - ret = rtas_cpu_state_change_mask(UP, cpus); - - if (ret) { - cpumask_var_t tmp_mask; - - if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) - return ret; - - /* Use tmp_mask to preserve cpus mask from first failure */ - cpumask_copy(tmp_mask, cpus); - rtas_offline_cpus_mask(tmp_mask); - free_cpumask_var(tmp_mask); - } - - return ret; -} -EXPORT_SYMBOL(rtas_online_cpus_mask); - -int rtas_offline_cpus_mask(cpumask_var_t cpus) -{ - return rtas_cpu_state_change_mask(DOWN, cpus); -} -EXPORT_SYMBOL(rtas_offline_cpus_mask); - int rtas_ibm_suspend_me(u64 handle) { long state; @@ -939,8 +848,6 @@ int rtas_ibm_suspend_me(u64 handle) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); - cpumask_var_t offline_mask; - int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -961,9 +868,6 @@ int rtas_ibm_suspend_me(u64 handle) return -EIO; } - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); @@ -972,24 +876,8 @@ int rtas_ibm_suspend_me(u64 handle) lock_device_hotplug(); - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); - cpuret = rtas_online_cpus_mask(offline_mask); - if (cpuret) { - pr_err("%s: Could not bring present CPUs online.\n", __func__); - atomic_set(&data.error, cpuret); - goto out; - } - cpu_hotplug_disable(); - /* Check if we raced with a CPU-Offline Operation */ - if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) { - pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__); - atomic_set(&data.error, -EAGAIN); - goto out_hotplug_enable; - } - /* Call function on all CPUs. One of us will make the * rtas call */ @@ -1001,18 +889,11 @@ int rtas_ibm_suspend_me(u64 handle) if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); -out_hotplug_enable: - cpu_hotplug_enable(); - /* Take down CPUs not online prior to suspend */ - cpuret = rtas_offline_cpus_mask(offline_mask); - if (cpuret) - pr_warn("%s: Could not restore CPUs to offline state.\n", - __func__); + cpu_hotplug_enable(); -out: unlock_device_hotplug(); - free_cpumask_var(offline_mask); + return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bd91dceb7010..1740a66cea84 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -16,7 +16,7 @@ #include <asm/setup.h> -unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; enum count_cache_flush_type { COUNT_CACHE_FLUSH_NONE = 0x1, @@ -109,7 +109,7 @@ device_initcall(barrier_nospec_debugfs_init); static __init int security_feature_debugfs_init(void) { debugfs_create_x64("security_features", 0400, powerpc_debugfs_root, - (u64 *)&powerpc_security_features); + &powerpc_security_features); return 0; } device_initcall(security_feature_debugfs_init); @@ -142,32 +142,33 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); - if (rfi_flush || thread_priv) { + if (rfi_flush) { struct seq_buf s; seq_buf_init(&s, buf, PAGE_SIZE - 1); - seq_buf_printf(&s, "Mitigation: "); - - if (rfi_flush) - seq_buf_printf(&s, "RFI Flush"); - - if (rfi_flush && thread_priv) - seq_buf_printf(&s, ", "); - + seq_buf_printf(&s, "Mitigation: RFI Flush"); if (thread_priv) - seq_buf_printf(&s, "L1D private per thread"); + seq_buf_printf(&s, ", L1D private per thread"); seq_buf_printf(&s, "\n"); return s.len; } + if (thread_priv) + return sprintf(buf, "Vulnerable: L1D private per thread\n"); + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Vulnerable\n"); } + +ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_meltdown(dev, attr, buf); +} #endif ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index c82577c4b15d..1b02d338a5f5 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,6 +8,12 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H +#ifdef CONFIG_CC_IS_CLANG +#define __nostackprotector +#else +#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) +#endif + void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 44b4c432a273..e50fbed36651 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -284,24 +284,42 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init early_setup(unsigned long dt_ptr) +void __init __nostackprotector early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Try new device tree based feature discovery ... */ - if (!dt_cpu_ftrs_init(__va(dt_ptr))) - /* Otherwise use the old style CPU table */ - identify_cpu(0, mfspr(SPRN_PVR)); - - /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ + /* + * Assume we're on cpu 0 for now. + * + * We need to load a PACA very early for a few reasons. + * + * The stack protector canary is stored in the paca, so as soon as we + * call any stack protected code we need r13 pointing somewhere valid. + * + * If we are using kcov it will call in_task() in its instrumentation, + * which relies on the current task from the PACA. + * + * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as + * printk(), which can trigger both stack protector and kcov. + * + * percpu variables and spin locks also use the paca. + * + * So set up a temporary paca. It will be replaced below once we know + * what CPU we are on. + */ initialise_paca(&boot_paca, 0); setup_paca(&boot_paca); fixup_boot_paca(); /* -------- printk is now safe to use ------- */ + /* Try new device tree based feature discovery ... */ + if (!dt_cpu_ftrs_init(__va(dt_ptr))) + /* Otherwise use the old style CPU table */ + identify_cpu(0, mfspr(SPRN_PVR)); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); @@ -523,6 +541,8 @@ static bool __init parse_cache_info(struct device_node *np, lsizep = of_get_property(np, propnames[3], NULL); if (bsizep == NULL) bsizep = lsizep; + if (lsizep == NULL) + lsizep = bsizep; if (lsizep != NULL) lsize = be32_to_cpu(*lsizep); if (bsizep != NULL) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index e6c30cee6abf..d215f9554553 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -200,14 +200,27 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk) * normal/non-checkpointed stack pointer. */ + unsigned long ret = tsk->thread.regs->gpr[1]; + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BUG_ON(tsk != current); if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { + preempt_disable(); tm_reclaim_current(TM_CAUSE_SIGNAL); if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) - return tsk->thread.ckpt_regs.gpr[1]; + ret = tsk->thread.ckpt_regs.gpr[1]; + + /* + * If we treclaim, we must clear the current thread's TM bits + * before re-enabling preemption. Otherwise we might be + * preempted and have the live MSR[TS] changed behind our back + * (tm_recheckpoint_new_task() would recheckpoint). Besides, we + * enter the signal handler in non-transactional state. + */ + tsk->thread.regs->msr &= ~MSR_TS_MASK; + preempt_enable(); } #endif - return tsk->thread.regs->gpr[1]; + return ret; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index ebb78effd280..6c18d58eca11 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -489,19 +489,11 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, */ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret) + struct mcontext __user *tm_frame, int sigret, + unsigned long msr) { - unsigned long msr = regs->msr; - WARN_ON(tm_suspend_disabled); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - /* Save both sets of general registers */ if (save_general_regs(¤t->thread.ckpt_regs, frame) || save_general_regs(regs, tm_frame)) @@ -912,6 +904,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -944,13 +940,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_frame = &rt_sf->uc_transact.uc_mcontext; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) goto badframe; } else @@ -1369,6 +1365,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -1402,9 +1402,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret)) + sigret, msr)) goto badframe; } else diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index bee704f32f96..a521f82a789a 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -192,7 +192,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, struct task_struct *tsk, - int signr, sigset_t *set, unsigned long handler) + int signr, sigset_t *set, unsigned long handler, + unsigned long msr) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -207,12 +208,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif struct pt_regs *regs = tsk->thread.regs; - unsigned long msr = tsk->thread.regs->msr; long err = 0; BUG_ON(tsk != current); - BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + BUG_ON(!MSR_TM_ACTIVE(msr)); WARN_ON(tm_suspend_disabled); @@ -222,13 +222,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, */ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); err |= __put_user(tm_v_regs, &tm_sc->v_regs); @@ -480,8 +473,10 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); + /* Don't allow userspace to set the trap value */ + regs->trap = 0; + /* These regs are not checkpointed; they can go in 'regs'. */ - err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); @@ -824,6 +819,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long newsp = 0; long err = 0; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -841,7 +840,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= __put_user(0, &frame->uc.uc_flags); err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ @@ -849,7 +848,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, tsk, ksig->sig, NULL, - (unsigned long)ksig->ka.sa.sa_handler); + (unsigned long)ksig->ka.sa.sa_handler, + msr); } else #endif { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 694522308cd5..0e0a2227af7d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -232,7 +232,7 @@ static u64 scan_dispatch_log(u64 stop_tb) * Accumulate stolen time by scanning the dispatch trace log. * Called on entry from user mode. */ -void accumulate_stolen_time(void) +void notrace accumulate_stolen_time(void) { u64 sst, ust; unsigned long save_irq_soft_mask = irq_soft_mask_return(); @@ -522,35 +522,6 @@ static inline void clear_irq_work_pending(void) "i" (offsetof(struct paca_struct, irq_work_pending))); } -void arch_irq_work_raise(void) -{ - preempt_disable(); - set_irq_work_pending_flag(); - /* - * Non-nmi code running with interrupts disabled will replay - * irq_happened before it re-enables interrupts, so setthe - * decrementer there instead of causing a hardware exception - * which would immediately hit the masked interrupt handler - * and have the net effect of setting the decrementer in - * irq_happened. - * - * NMI interrupts can not check this when they return, so the - * decrementer hardware exception is raised, which will fire - * when interrupts are next enabled. - * - * BookE does not support this yet, it must audit all NMI - * interrupt handlers to ensure they call nmi_enter() so this - * check would be correct. - */ - if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) { - set_dec(1); - } else { - hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_DEC; - } - preempt_enable(); -} - #else /* 32-bit */ DEFINE_PER_CPU(u8, irq_work_pending); @@ -559,16 +530,27 @@ DEFINE_PER_CPU(u8, irq_work_pending); #define test_irq_work_pending() __this_cpu_read(irq_work_pending) #define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) +#endif /* 32 vs 64 bit */ + void arch_irq_work_raise(void) { + /* + * 64-bit code that uses irq soft-mask can just cause an immediate + * interrupt here that gets soft masked, if this is called under + * local_irq_disable(). It might be possible to prevent that happening + * by noticing interrupts are disabled and setting decrementer pending + * to be replayed when irqs are enabled. The problem there is that + * tracing can call irq_work_raise, including in code that does low + * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on) + * which could get tangled up if we're messing with the same state + * here. + */ preempt_disable(); set_irq_work_pending_flag(); set_dec(1); preempt_enable(); } -#endif /* 32 vs 64 bit */ - #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 @@ -959,6 +941,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; vdso_data->stamp_xtime = xt; vdso_data->stamp_sec_fraction = frac_sec; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++(vdso_data->tb_update_count); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c8ea3a253b81..88e1c6892648 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -250,15 +250,22 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, } NOKPROBE_SYMBOL(oops_end); +static char *get_mmu_str(void) +{ + if (early_radix_enabled()) + return " MMU=Radix"; + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return " MMU=Hash"; + return ""; +} + static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n", + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", - PAGE_SIZE / 1024, - early_radix_enabled() ? " MMU=Radix" : "", - early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "", + PAGE_SIZE / 1024, get_mmu_str(), IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d60598113a9f..36849b693d86 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -704,7 +704,7 @@ int vdso_getcpu_init(void) node = cpu_to_node(cpu); WARN_ON_ONCE(node > 0xffff); - val = (cpu & 0xfff) | ((node & 0xffff) << 16); + val = (cpu & 0xffff) | ((node & 0xffff) << 16); mtspr(SPRN_SPRG_VDSO_WRITE, val); get_paca()->sprg_vdso = val; diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index becd9f8767ed..a967e795b96d 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -156,12 +156,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f + mflr r12 + .cfi_register lr,r12 + bl __get_datapage@local /* get data page */ + lwz r5, CLOCK_HRTIMER_RES(r3) + mtlr r12 li r3,0 cmpli cr0,r4,0 crclr cr0*4+so beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l stw r3,TSPC32_TV_SEC(r4) stw r5,TSPC32_TV_NSEC(r4) blr diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 3f92561a64c4..526f5ba2593e 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -35,7 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 07bfe33fe874..81757f06bbd7 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -186,12 +186,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f + mflr r12 + .cfi_register lr,r12 + bl V_LOCAL_FUNC(__get_datapage) + lwz r5, CLOCK_HRTIMER_RES(r3) + mtlr r12 li r3,0 cmpldi cr0,r4,0 crclr cr0*4+so beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l std r3,TSPC64_TV_SEC(r4) std r5,TSPC64_TV_NSEC(r4) blr diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 060a1acd7c6d..4638d2863388 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -326,6 +326,12 @@ SECTIONS *(.branch_lt) } +#ifdef CONFIG_DEBUG_INFO_BTF + .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { + *(.BTF) + } +#endif + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { __start_opd = .; KEEP(*(.opd)) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f53997a8ca62..b8e13d5a4a31 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM_BOOK3S_32_HANDLER config KVM_BOOK3S_64_HANDLER bool select KVM_BOOK3S_HANDLER + select PPC_DAWR_FORCE_ENABLE config KVM_BOOK3S_PR_POSSIBLE bool diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 08b2dfbc5305..56dbc2f6e3d2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -38,7 +38,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ if (kvmhv_on_pseries()) return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, - __pa(to), __pa(from), n); + (to != NULL) ? __pa(to): 0, + (from != NULL) ? __pa(from): 0, n); quadrant = 1; if (!pid) @@ -353,7 +354,13 @@ static struct kmem_cache *kvm_pmd_cache; static pte_t *kvmppc_pte_alloc(void) { - return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); + pte_t *pte; + + pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); + /* pmd_populate() will only reference _pa(pte). */ + kmemleak_ignore(pte); + + return pte; } static void kvmppc_pte_free(pte_t *ptep) @@ -369,7 +376,13 @@ static inline int pmd_is_leaf(pmd_t pmd) static pmd_t *kvmppc_pmd_alloc(void) { - return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); + pmd_t *pmd; + + pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); + /* pud_populate() will only reference _pa(pmd). */ + kmemleak_ignore(pmd); + + return pmd; } static void kvmppc_pmd_free(pmd_t *pmdp) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 446d91d6cf70..2ae6ee1d5755 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -110,6 +110,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, struct kvmppc_spapr_tce_iommu_table *stit, *tmp; struct iommu_table_group *table_group = NULL; + rcu_read_lock(); list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { table_group = iommu_group_get_iommudata(grp); @@ -124,7 +125,9 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, kref_put(&stit->kref, kvm_spapr_tce_liobn_put); } } + cond_resched_rcu(); } + rcu_read_unlock(); } extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, @@ -142,12 +145,14 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!f.file) return -EBADF; + rcu_read_lock(); list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { if (stt == f.file->private_data) { found = true; break; } } + rcu_read_unlock(); fdput(f); @@ -180,6 +185,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!tbl) return -EINVAL; + rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { if (tbl != stit->tbl) continue; @@ -187,14 +193,17 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!kref_get_unless_zero(&stit->kref)) { /* stit is being destroyed */ iommu_tce_table_put(tbl); + rcu_read_unlock(); return -ENOTTY; } /* * The table is already known to this KVM, we just increased * its KVM reference counter and can return. */ + rcu_read_unlock(); return 0; } + rcu_read_unlock(); stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { @@ -400,18 +409,19 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, if (kvmppc_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; + rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { unsigned long hpa = 0; struct mm_iommu_table_group_mem_t *mem; long shift = stit->tbl->it_page_shift; mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift); - if (!mem) - return H_TOO_HARD; - - if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) + if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) { + rcu_read_unlock(); return H_TOO_HARD; + } } + rcu_read_unlock(); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f8975c620f41..cc4279bc6da6 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2351,7 +2351,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_unlock(&kvm->lock); if (!vcore) - goto free_vcpu; + goto uninit_vcpu; spin_lock(&vcore->lock); ++vcore->num_threads; @@ -2368,6 +2368,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_vcpu: kmem_cache_free(kvm_vcpu_cache, vcpu); out: @@ -3618,6 +3620,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { kvmppc_nested_cede(vcpu); + kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } } else { diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cc65af8fe6f7..3f6ad3f58628 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1769,10 +1769,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, err = kvmppc_mmu_init(vcpu); if (err < 0) - goto uninit_vcpu; + goto free_shared_page; return vcpu; +free_shared_page: + free_page((unsigned long)vcpu->arch.shared); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_shadow_vcpu: diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index a3f9c665bb5b..baa740815b3c 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2005,6 +2005,10 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) pr_devel("Creating xive for partition\n"); + /* Already there ? */ + if (kvm->arch.xive) + return -EEXIST; + xive = kvmppc_xive_get_device(kvm, type); if (!xive) return -ENOMEM; @@ -2014,12 +2018,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) xive->kvm = kvm; mutex_init(&xive->lock); - /* Already there ? */ - if (kvm->arch.xive) - ret = -EEXIST; - else - kvm->arch.xive = xive; - /* We use the default queue size set by the host */ xive->q_order = xive_native_default_eq_shift(); if (xive->q_order < PAGE_SHIFT) @@ -2039,6 +2037,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) if (ret) return ret; + kvm->arch.xive = xive; return 0; } diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 78b906ffa0d2..235d57d6c205 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -50,6 +50,24 @@ static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) } } +static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, + u8 prio, __be32 *qpage, + u32 order, bool can_escalate) +{ + int rc; + __be32 *qpage_prev = q->qpage; + + rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, + can_escalate); + if (rc) + return rc; + + if (qpage_prev) + put_page(virt_to_page(qpage_prev)); + + return rc; +} + void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -582,19 +600,14 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, q->guest_qaddr = 0; q->guest_qshift = 0; - rc = xive_native_configure_queue(xc->vp_id, q, priority, - NULL, 0, true); + rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, + NULL, 0, true); if (rc) { pr_err("Failed to reset queue %d for VCPU %d: %d\n", priority, xc->server_num, rc); return rc; } - if (q->qpage) { - put_page(virt_to_page(q->qpage)); - q->qpage = NULL; - } - return 0; } @@ -624,17 +637,18 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, srcu_idx = srcu_read_lock(&kvm->srcu); gfn = gpa_to_gfn(kvm_eq.qaddr); - page = gfn_to_page(kvm, gfn); - if (is_error_page(page)) { + + page_size = kvm_host_page_size(vcpu, gfn); + if (1ull << kvm_eq.qshift > page_size) { srcu_read_unlock(&kvm->srcu, srcu_idx); - pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); + pr_warn("Incompatible host page size %lx!\n", page_size); return -EINVAL; } - page_size = kvm_host_page_size(kvm, gfn); - if (1ull << kvm_eq.qshift > page_size) { + page = gfn_to_page(kvm, gfn); + if (is_error_page(page)) { srcu_read_unlock(&kvm->srcu, srcu_idx); - pr_warn("Incompatible host page size %lx!\n", page_size); + pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); return -EINVAL; } @@ -653,8 +667,8 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, * OPAL level because the use of END ESBs is not supported by * Linux. */ - rc = xive_native_configure_queue(xc->vp_id, q, priority, - (__be32 *) qaddr, kvm_eq.qshift, true); + rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, + (__be32 *) qaddr, kvm_eq.qshift, true); if (rc) { pr_err("Failed to configure queue %d for VCPU %d: %d\n", priority, xc->server_num, rc); @@ -1081,7 +1095,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) dev->private = xive; xive->dev = dev; xive->kvm = kvm; - kvm->arch.xive = xive; mutex_init(&xive->mapping_lock); mutex_init(&xive->lock); @@ -1102,6 +1115,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) if (ret) return ret; + kvm->arch.xive = xive; return 0; } diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 9208c82ed08d..88f62cf2dbb9 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; u32 inst; - int ra, rs, rt; enum emulation_result emulated = EMULATE_FAIL; int advance = 1; struct instruction_op op; @@ -85,10 +84,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (emulated != EMULATE_DONE) return emulated; - ra = get_ra(inst); - rs = get_rs(inst); - rt = get_rt(inst); - /* * if mmio_vsx_tx_sx_enabled == 0, copy data between * VSR[0..31] and memory diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index f69a6aab7bfb..1ddb26394e8a 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -_GLOBAL(__clear_user) +_GLOBAL(__arch_clear_user) /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination @@ -87,4 +87,4 @@ _GLOBAL(__clear_user) EX_TABLE(8b, 91b) EX_TABLE(9b, 91b) -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 507b18b1660e..169872bc0892 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -17,7 +17,7 @@ PPC64_CACHES: .section ".text" /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -58,7 +58,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL_TOC(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -181,4 +181,4 @@ err1; dcbz 0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 8d68f03bf5a4..55c0208e7bfd 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -187,6 +187,7 @@ void mmu_mark_initmem_nx(void) int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; if (cpu_has_feature(CPU_FTR_601)) @@ -201,9 +202,10 @@ void mmu_mark_initmem_nx(void) size = block_size(base, top); size = max(size, 128UL << 10); if ((top - base) > size) { - if (strict_kernel_rwx_enabled()) - pr_warn("Kernel _etext not properly aligned\n"); size <<= 1; + if (strict_kernel_rwx_enabled() && base + size > border) + pr_warn("Some RW data is getting mapped X. " + "Adjust CONFIG_DATA_SHIFT to avoid that.\n"); } setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 64340fc677bb..1f46f7180363 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -297,10 +297,18 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, psize, ssize); - + if (ret == -1) { + /* Try to remove a non bolted entry */ + ret = mmu_hash_ops.hpte_remove(hpteg); + if (ret != -1) + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); + } if (ret < 0) break; + cond_resched(); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 01bc9663360d..c7e3cc805cef 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -354,7 +354,6 @@ static inline void pgtable_free(void *table, int index) } } -#ifdef CONFIG_SMP void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) { unsigned long pgf = (unsigned long)table; @@ -371,12 +370,6 @@ void __tlb_remove_table(void *_table) return pgtable_free(table, index); } -#else -void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) -{ - return pgtable_free(table, index); -} -#endif #ifdef CONFIG_PROC_FS atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index ae7fca40e5b3..432fd9fa8c3f 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -83,13 +83,17 @@ static int pkey_initialize(void) scan_pkey_feature(); /* - * Let's assume 32 pkeys on P8 bare metal, if its not defined by device - * tree. We make this exception since skiboot forgot to expose this - * property on power8. + * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device + * tree. We make this exception since some version of skiboot forgot to + * expose this property on power8/9. */ - if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) && - cpu_has_feature(CPU_FTRS_POWER8)) - pkeys_total = 32; + if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR)) { + unsigned long pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || + PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9) + pkeys_total = 32; + } /* * Adjust the upper limit, based on the number of bits supported by @@ -367,12 +371,14 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) return true; pkey_shift = pkeyshift(pkey); - if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) - return true; + if (execute) + return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + + amr = read_amr(); + if (write) + return !(amr & (AMR_WR_BIT << pkey_shift)); - amr = read_amr(); /* Delay reading amr until absolutely needed */ - return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || - (write && !(amr & (AMR_WR_BIT << pkey_shift)))); + return !(amr & (AMR_RD_BIT << pkey_shift)); } bool arch_pte_access_permitted(u64 pte, bool write, bool execute) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index ec6b7ad70659..74b3a03a578e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -254,7 +254,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault in a valid region (the exception table search passed // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, is_write)) + if (bad_kuap_fault(regs, address, is_write)) return true; // What's left? Kernel fault on user in well defined regions (extable @@ -262,6 +262,9 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } +// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE +#define SIGFRAME_MAX_SIZE (4096 + 128) + static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma, unsigned int flags, bool *must_retry) @@ -269,7 +272,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, /* * N.B. The POWER/Open ABI allows programs to access up to * 288 bytes below the stack pointer. - * The kernel signal delivery code writes up to about 1.5kB + * The kernel signal delivery code writes a bit over 4KB * below the stack pointer (r1) before decrementing it. * The exec code can write slightly over 640kB to the stack * before setting the user r1. Thus we allow the stack to @@ -294,7 +297,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, * between the last mapped region and the stack will * expand the stack rather than segfaulting. */ - if (address + 2048 >= uregs->gpr[1]) + if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1]) return false; if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && @@ -375,6 +378,9 @@ static void sanity_check_fault(bool is_write, bool is_user, * Userspace trying to access kernel address, we get PROTFAULT for that. */ if (is_user && address >= TASK_SIZE) { + if ((long)address == -1) + return; + pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n", current->comm, current->pid, address, from_kuid(&init_user_ns, current_uid())); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1de0f43a68e5..e8157325e089 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -53,20 +53,29 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); + new = NULL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = PGT_CACHE(PTE_INDEX_SIZE); + cachep = NULL; num_hugepd = 1; + new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; + new = NULL; } - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + if (!cachep && !new) { + WARN_ONCE(1, "No page table cache created for hugetlb tables"); + return -ENOMEM; + } + + if (cachep) + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); - if (! new) + if (!new) return -ENOMEM; /* @@ -92,7 +101,10 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - kmem_cache_free(cachep, new); + if (cachep) + kmem_cache_free(cachep, new); + else + pte_free(mm, new); } else { kmemleak_ignore(new); } @@ -319,8 +331,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, - get_hugepd_cache_index(PTE_INDEX_SIZE)); + pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); @@ -519,13 +530,6 @@ retry: return page; } -static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, - unsigned long sz) -{ - unsigned long __boundary = (addr + sz) & ~(sz-1); - return (__boundary - 1 < end - 1) ? __boundary : end; -} - #ifdef CONFIG_PPC_MM_SLICES unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -596,6 +600,7 @@ __setup("hugepagesz=", hugepage_setup_sz); static int __init hugetlbpage_init(void) { + bool configured = false; int psize; if (hugetlb_disabled) { @@ -640,16 +645,22 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_cache_add(PTE_INDEX_SIZE); - else if (pdshift > shift) - pgtable_cache_add(pdshift - shift); - else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) + if (pdshift > shift) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + pgtable_cache_add(pdshift - shift); + } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || + IS_ENABLED(CONFIG_PPC_8xx)) { pgtable_cache_add(PTE_T_ORDER); + } + + configured = true; } - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) - hugetlbpage_init_default(); + if (configured) { + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) + hugetlbpage_init_default(); + } else + pr_info("Failed to initialize. Disabling HugeTLB"); return 0; } @@ -673,68 +684,3 @@ void flush_dcache_icache_hugepage(struct page *page) } } } - -static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long pte_end; - struct page *head, *page; - pte_t pte; - int refs; - - pte_end = (addr + sz) & ~(sz-1); - if (pte_end < end) - end = pte_end; - - pte = READ_ONCE(*ptep); - - if (!pte_access_permitted(pte, write)) - return 0; - - /* hugepages are never "special" */ - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - - page = head + ((addr & (sz-1)) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - /* Could be optimized better */ - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, - unsigned long end, int write, struct page **pages, int *nr) -{ - pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(hugepd); - unsigned long next; - - ptep = hugepte_offset(hugepd, addr, pdshift); - do { - next = hugepte_addr_end(addr, end, sz); - if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) - return 0; - } while (ptep++, addr = next, addr != end); - - return 1; -} diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 0e6ed4413eea..1cfe57b51d7e 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -117,7 +117,7 @@ static void __init kasan_remap_early_shadow_ro(void) kasan_populate_pte(kasan_early_shadow_pte, prot); - for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2eda1ec36f55..99fcdf73f207 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -270,6 +270,14 @@ void __init mem_init(void) BUILD_BUG_ON(MMU_PAGE_COUNT > 16); #ifdef CONFIG_SWIOTLB + /* + * Some platforms (e.g. 85xx) limit DMA-able memory way below + * 4G. We force memblock to bottom-up mode to ensure that the + * memory allocated in swiotlb_init() is DMA-able. + * As it's the last memblock allocation, no need to reset it + * back to to-down. + */ + memblock_set_bottom_up(true); swiotlb_init(0); #endif diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 2ca407cedbe7..eaeee402f96e 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -397,7 +397,7 @@ _GLOBAL(set_context) * extern void loadcam_entry(unsigned int index) * * Load TLBCAM[index] entry in to the L2 CAM MMU - * Must preserve r7, r8, r9, and r10 + * Must preserve r7, r8, r9, r10 and r11 */ _GLOBAL(loadcam_entry) mflr r5 @@ -433,6 +433,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) */ _GLOBAL(loadcam_multi) mflr r8 + /* Don't switch to AS=1 if already there */ + mfmsr r11 + andi. r11,r11,MSR_IS + bne 10f /* * Set up temporary TLB entry that is the same as what we're @@ -458,6 +462,7 @@ _GLOBAL(loadcam_multi) mtmsr r6 isync +10: mr r9,r3 add r10,r3,r4 2: bl loadcam_entry @@ -466,6 +471,10 @@ _GLOBAL(loadcam_multi) mr r3,r9 blt 2b + /* Don't return to AS=0 if we were in AS=1 at function start */ + andi. r11,r11,MSR_IS + bne 3f + /* Return to AS=0 and clear the temporary entry */ mfmsr r6 rlwinm. r6,r6,0,~(MSR_IS|MSR_DS) @@ -481,6 +490,7 @@ _GLOBAL(loadcam_multi) tlbwe isync +3: mtlr r8 blr #endif diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 35cb96cfc258..5159445a21dd 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -360,7 +360,7 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_stext + 1)) + if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else change_page_attr(page, numpages, PAGE_KERNEL); @@ -372,8 +372,9 @@ void mark_rodata_ro(void) struct page *page; unsigned long numpages; - if (v_block_mapped((unsigned long)_sinittext)) { + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); + ptdump_check_wx(); return; } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index b69912854dc3..485363358395 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -62,6 +62,7 @@ struct pg_state { unsigned long start_address; unsigned long start_pa; unsigned long last_pa; + unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -163,9 +164,9 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { + if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = PAGE_SIZE >> 10; + delta = st->page_size >> 10; } else { pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; @@ -181,10 +182,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr) static void note_prot_wx(struct pg_state *st, unsigned long addr) { + pte_t pte = __pte(st->current_flags); + if (!st->check_wx) return; - if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) + if (!pte_write(pte) || !pte_exec(pte)) return; WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n", @@ -194,7 +197,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val) + unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; u64 pa = val & PTE_RPN_MASK; @@ -206,6 +209,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: @@ -217,7 +221,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ } else if (flag != st->current_flags || level != st->level || addr >= st->marker[1].start_address || - (pa != st->last_pa + PAGE_SIZE && + (pa != st->last_pa + st->page_size && (pa != st->start_pa || st->start_pa != st->last_pa))) { /* Check the PTE flags */ @@ -245,6 +249,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; st->current_flags = flag; st->level = level; } else { @@ -260,7 +265,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) for (i = 0; i < PTRS_PER_PTE; i++, pte++) { addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte)); + note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE); } } @@ -277,7 +282,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) /* pmd exists */ walk_pte(st, pmd, addr); else - note_page(st, addr, 3, pmd_val(*pmd)); + note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE); } } @@ -293,7 +298,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) /* pud exists */ walk_pmd(st, pud, addr); else - note_page(st, addr, 2, pud_val(*pud)); + note_page(st, addr, 2, pud_val(*pud), PUD_SIZE); } } @@ -314,7 +319,7 @@ static void walk_pagetables(struct pg_state *st) /* pgd exists */ walk_pud(st, pgd, addr); else - note_page(st, addr, 1, pgd_val(*pgd)); + note_page(st, addr, 1, pgd_val(*pgd), PGDIR_SIZE); } } @@ -373,7 +378,7 @@ static int ptdump_show(struct seq_file *m, void *v) /* Traverse kernel page tables */ walk_pagetables(&st); - note_page(&st, 0, 0, 0); + note_page(&st, 0, 0, 0, 0); return 0; } diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index f7ed2f187cb0..784f8df17f73 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -31,6 +31,11 @@ static const struct flag_info flag_array[] = { .set = "present", .clear = " ", }, { + .mask = _PAGE_COHERENT, + .val = _PAGE_COHERENT, + .set = "coherent", + .clear = " ", + }, { .mask = _PAGE_GUARDED, .val = _PAGE_GUARDED, .set = "guarded", diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 42bbcd47cc85..dffe1a45b6ed 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) { #endif -static inline bool slice_addr_is_low(unsigned long addr) +static inline notrace bool slice_addr_is_low(unsigned long addr) { u64 tmp = (u64)addr; @@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, mm_ctx_user_psize(¤t->mm->context), 1); } -unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) { unsigned char *psizes; int index, mask_index; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index c2ee6041f02c..4d09d0a6e899 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1112,6 +1112,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } /* + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } + + /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can * calculate total size from idx. diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index faad5b315f49..5834ac3dd474 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1400,16 +1400,6 @@ static void h_24x7_event_read(struct perf_event *event) h24x7hw = &get_cpu_var(hv_24x7_hw); h24x7hw->events[i] = event; put_cpu_var(h24x7hw); - /* - * Clear the event count so we can compute the _change_ - * in the 24x7 raw counter value at the end of the txn. - * - * Note that we could alternatively read the 24x7 value - * now and save its value in event->hw.prev_count. But - * that would require issuing a hcall, which would then - * defeat the purpose of using the txn interface. - */ - local64_set(&event->count, 0); } put_cpu_var(hv_24x7_reqb); diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c index e6e2adcc7b64..c13d64c3b019 100644 --- a/arch/powerpc/platforms/4xx/pci.c +++ b/arch/powerpc/platforms/4xx/pci.c @@ -1242,7 +1242,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) if (mbase == NULL) { printk(KERN_ERR "%pOF: Can't map internal config space !", port->node); - goto done; + return; } while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA) @@ -1252,9 +1252,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) } if (attempt) port->link = 1; -done: iounmap(mbase); - } static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = { diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 2794235e9d3e..98d824563b35 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -379,7 +379,7 @@ config PPC_KUAP config PPC_KUAP_DEBUG bool "Extra debugging for Kernel Userspace Access Protection" - depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32) + depends on PPC_KUAP && (PPC_RADIX_MMU || PPC32) help Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index d40253a18b1c..512c87bc20a3 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1978,8 +1978,9 @@ static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { - int ret; struct spu_context *ctx = file->private_data; + u32 stat, data; + int ret; if (!access_ok(buf, len)) return -EFAULT; @@ -1988,11 +1989,16 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_mbox_info_read(ctx, buf, len, pos); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.prob.pu_mb_R; spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + /* EOF if there's no entry in the mbox */ + if (!(stat & 0x0000ff)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); } static const struct file_operations spufs_mbox_info_fops = { @@ -2019,6 +2025,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + u32 stat, data; int ret; if (!access_ok(buf, len)) @@ -2028,11 +2035,16 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_ibox_info_read(ctx, buf, len, pos); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.priv2.puint_mb_R; spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + /* EOF if there's no entry in the ibox */ + if (!(stat & 0xff0000)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); } static const struct file_operations spufs_ibox_info_fops = { @@ -2041,6 +2053,11 @@ static const struct file_operations spufs_ibox_info_fops = { .llseek = generic_file_llseek, }; +static size_t spufs_wbox_info_cnt(struct spu_context *ctx) +{ + return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32); +} + static ssize_t __spufs_wbox_info_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { @@ -2049,7 +2066,7 @@ static ssize_t __spufs_wbox_info_read(struct spu_context *ctx, u32 wbox_stat; wbox_stat = ctx->csa.prob.mb_stat_R; - cnt = 4 - ((wbox_stat & 0x00ff00) >> 8); + cnt = spufs_wbox_info_cnt(ctx); for (i = 0; i < cnt; i++) { data[i] = ctx->csa.spu_mailbox_data[i]; } @@ -2062,7 +2079,8 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - int ret; + u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)]; + int ret, count; if (!access_ok(buf, len)) return -EFAULT; @@ -2071,11 +2089,13 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_wbox_info_read(ctx, buf, len, pos); + count = spufs_wbox_info_cnt(ctx); + memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data)); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &data, + count * sizeof(u32)); } static const struct file_operations spufs_wbox_info_fops = { @@ -2084,27 +2104,33 @@ static const struct file_operations spufs_wbox_info_fops = { .llseek = generic_file_llseek, }; -static ssize_t __spufs_dma_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static void spufs_get_dma_info(struct spu_context *ctx, + struct spu_dma_info *info) { - struct spu_dma_info info; - struct mfc_cq_sr *qp, *spuqp; int i; - info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; - info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; - info.dma_info_status = ctx->csa.spu_chnldata_RW[24]; - info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; - info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; + info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; + info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; + info->dma_info_status = ctx->csa.spu_chnldata_RW[24]; + info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; + info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; for (i = 0; i < 16; i++) { - qp = &info.dma_info_command_data[i]; - spuqp = &ctx->csa.priv2.spuq[i]; + struct mfc_cq_sr *qp = &info->dma_info_command_data[i]; + struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i]; qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW; qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW; qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW; qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW; } +} + +static ssize_t __spufs_dma_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + struct spu_dma_info info; + + spufs_get_dma_info(ctx, &info); return simple_read_from_buffer(buf, len, pos, &info, sizeof info); @@ -2114,6 +2140,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + struct spu_dma_info info; int ret; if (!access_ok(buf, len)) @@ -2123,11 +2150,12 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_dma_info_read(ctx, buf, len, pos); + spufs_get_dma_info(ctx, &info); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); } static const struct file_operations spufs_dma_info_fops = { @@ -2136,13 +2164,31 @@ static const struct file_operations spufs_dma_info_fops = { .llseek = no_llseek, }; +static void spufs_get_proxydma_info(struct spu_context *ctx, + struct spu_proxydma_info *info) +{ + int i; + + info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW; + info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; + info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; + + for (i = 0; i < 8; i++) { + struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i]; + struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i]; + + qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; + qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; + qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; + qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; + } +} + static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { struct spu_proxydma_info info; - struct mfc_cq_sr *qp, *puqp; int ret = sizeof info; - int i; if (len < ret) return -EINVAL; @@ -2150,18 +2196,7 @@ static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, if (!access_ok(buf, len)) return -EFAULT; - info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW; - info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; - info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; - for (i = 0; i < 8; i++) { - qp = &info.proxydma_info_command_data[i]; - puqp = &ctx->csa.priv2.puq[i]; - - qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; - qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; - qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; - qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; - } + spufs_get_proxydma_info(ctx, &info); return simple_read_from_buffer(buf, len, pos, &info, sizeof info); @@ -2171,17 +2206,19 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + struct spu_proxydma_info info; int ret; ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_proxydma_info_read(ctx, buf, len, pos); + spufs_get_proxydma_info(ctx, &info); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); } static const struct file_operations spufs_proxydma_info_fops = { diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 9cd6f3e1000b..09a0594350b6 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -294,23 +294,6 @@ static int __init maple_probe(void) return 1; } -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .calibrate_decr = generic_calibrate_decr, - .progress = maple_progress, - .power_save = power4_idle, -}; - #ifdef CONFIG_EDAC /* * Register a platform device for CPC925 memory controller on @@ -367,3 +350,20 @@ static int __init maple_cpc925_edac_setup(void) } machine_device_initcall(maple, maple_cpc925_edac_setup); #endif + +define_machine(maple) { + .name = "Maple", + .probe = maple_probe, + .setup_arch = maple_setup_arch, + .init_IRQ = maple_init_IRQ, + .pci_irq_fixup = maple_pci_irq_fixup, + .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, + .restart = maple_restart, + .halt = maple_halt, + .get_boot_time = maple_get_boot_time, + .set_rtc_time = maple_set_rtc_time, + .get_rtc_time = maple_get_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = maple_progress, + .power_save = power4_idle, +}; diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index fd14a6237954..28330c150d4f 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -1155,10 +1155,10 @@ static void __init pnv_power9_idle_init(void) pnv_deepest_stop_psscr_mask); } - pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n", + pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", pnv_first_spr_loss_level); - pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n", + pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", pnv_first_tb_loss_level); } diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index e04b20625cb9..000b350d4060 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -59,10 +59,6 @@ static void export_imc_mode_and_cmd(struct device_node *node, imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); - /* - * Return here, either because 'imc' directory already exists, - * Or failed to create a new one. - */ if (!imc_debugfs_parent) return; @@ -135,7 +131,6 @@ static int imc_get_mem_addr_nest(struct device_node *node, } pmu_ptr->imc_counter_mmaped = true; - export_imc_mode_and_cmd(node, pmu_ptr); kfree(base_addr_arr); kfree(chipid_arr); return 0; @@ -151,7 +146,7 @@ error: * and domain as the inputs. * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets */ -static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) +static struct imc_pmu *imc_pmu_create(struct device_node *parent, int pmu_index, int domain) { int ret = 0; struct imc_pmu *pmu_ptr; @@ -159,27 +154,23 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) /* Return for unknown domain */ if (domain < 0) - return -EINVAL; + return NULL; /* memory for pmu */ pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL); if (!pmu_ptr) - return -ENOMEM; + return NULL; /* Set the domain */ pmu_ptr->domain = domain; ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size); - if (ret) { - ret = -EINVAL; + if (ret) goto free_pmu; - } if (!of_property_read_u32(parent, "offset", &offset)) { - if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) { - ret = -EINVAL; + if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) goto free_pmu; - } } /* Function to register IMC pmu */ @@ -190,14 +181,14 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) if (pmu_ptr->domain == IMC_DOMAIN_NEST) kfree(pmu_ptr->mem_info); kfree(pmu_ptr); - return ret; + return NULL; } - return 0; + return pmu_ptr; free_pmu: kfree(pmu_ptr); - return ret; + return NULL; } static void disable_nest_pmu_counters(void) @@ -254,6 +245,7 @@ int get_max_nest_dev(void) static int opal_imc_counters_probe(struct platform_device *pdev) { struct device_node *imc_dev = pdev->dev.of_node; + struct imc_pmu *pmu; int pmu_count = 0, domain; bool core_imc_reg = false, thread_imc_reg = false; u32 type; @@ -269,6 +261,7 @@ static int opal_imc_counters_probe(struct platform_device *pdev) } for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) { + pmu = NULL; if (of_property_read_u32(imc_dev, "type", &type)) { pr_warn("IMC Device without type property\n"); continue; @@ -285,7 +278,14 @@ static int opal_imc_counters_probe(struct platform_device *pdev) domain = IMC_DOMAIN_THREAD; break; case IMC_TYPE_TRACE: - domain = IMC_DOMAIN_TRACE; + /* + * FIXME. Using trace_imc events to monitor application + * or KVM thread performance can cause a checkstop + * (system crash). + * Disable it for now. + */ + pr_info_once("IMC: disabling trace_imc PMU\n"); + domain = -1; break; default: pr_warn("IMC Unknown Device type \n"); @@ -293,9 +293,13 @@ static int opal_imc_counters_probe(struct platform_device *pdev) break; } - if (!imc_pmu_create(imc_dev, pmu_count, domain)) { - if (domain == IMC_DOMAIN_NEST) + pmu = imc_pmu_create(imc_dev, pmu_count, domain); + if (pmu != NULL) { + if (domain == IMC_DOMAIN_NEST) { + if (!imc_debugfs_parent) + export_imc_mode_and_cmd(imc_dev, pmu); pmu_count++; + } if (domain == IMC_DOMAIN_CORE) core_imc_reg = true; if (domain == IMC_DOMAIN_THREAD) @@ -303,10 +307,6 @@ static int opal_imc_counters_probe(struct platform_device *pdev) } } - /* If none of the nest units are registered, remove debugfs interface */ - if (pmu_count == 0) - debugfs_remove_recursive(imc_debugfs_parent); - /* If core imc is not registered, unregister thread-imc */ if (!core_imc_reg && thread_imc_reg) unregister_thread_imc(); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0f72c7484824..050131e886f1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1556,6 +1556,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + if (pdn->m64_single_mode) pe_num = pdn->pe_num_map[vf_index]; else @@ -1568,13 +1572,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->pbus = NULL; pe->parent_dev = pdev; pe->mve_number = -1; - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | - pci_iov_virtfn_devfn(pdev, vf_index); + pe->rid = (vf_bus << 8) | vf_devfn; pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1588,6 +1590,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + pnv_pci_ioda2_setup_dma_pe(phb, pe); #ifdef CONFIG_IOMMU_API iommu_register_group(&pe->table_group, @@ -2897,9 +2908,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) struct pci_dn *pdn; int mul, total_vfs; - if (!pdev->is_physfn || pci_dev_is_added(pdev)) - return; - pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; pdn->m64_single_mode = false; @@ -2974,6 +2982,30 @@ truncate_iov: res->end = res->start - 1; } } + +static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) +{ + if (WARN_ON(pci_dev_is_added(pdev))) + return; + + if (pdev->is_virtfn) { + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); + + /* + * VF PEs are single-device PEs so their pdev pointer needs to + * be set. The pdev doesn't exist when the PE is allocated (in + * (pcibios_sriov_enable()) so we fix it up here. + */ + pe->pdev = pdev; + WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); + } else if (pdev->is_physfn) { + /* + * For PFs adjust their allocated IOV resources to match what + * the PHB can support using it's M64 BAR table. + */ + pnv_pci_ioda_fixup_iov_resources(pdev); + } +} #endif /* CONFIG_PCI_IOV */ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, @@ -3870,7 +3902,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, ppc_md.pcibios_default_alignment = pnv_pci_default_alignment; #ifdef CONFIG_PCI_IOV - ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; + ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov; ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable; ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index ff1a33fee8e6..2bfb72d057a7 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -814,24 +814,6 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; -#ifdef CONFIG_PCI_IOV - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - - /* Fix the VF pdn PE number */ - if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - WARN_ON(pdn->pe_number != IODA_INVALID_PE); - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - if (pe->rid == ((pdev->bus->number << 8) | - (pdev->devfn & 0xff))) { - pdn->pe_number = pe->pe_number; - pe->pdev = pdev; - break; - } - } - } -#endif /* CONFIG_PCI_IOV */ if (phb && phb->dma_dev_setup) phb->dma_dev_setup(phb, pdev); @@ -1089,6 +1071,23 @@ void __init pnv_pci_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return; +#ifdef CONFIG_PCIEPORTBUS + /* + * On PowerNV PCIe devices are (currently) managed in cooperation + * with firmware. This isn't *strictly* required, but there's enough + * assumptions baked into both firmware and the platform code that + * it's unwise to allow the portbus services to be used. + * + * We need to fix this eventually, but for now set this flag to disable + * the portbus driver. The AER service isn't required since that AER + * events are handled via EEH. The pciehp hotplug driver can't work + * without kernel changes (and portbus binding breaks pnv_php). The + * other services also require some thinking about how we're going + * to integrate them. + */ + pcie_ports_disabled = true; +#endif + /* Look for IODA IO-Hubs. */ for_each_compatible_node(np, NULL, "ibm,ioda-hub") { pnv_pci_init_ioda_hub(np); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 94cd96b9b7bb..d87981f90fdc 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -146,23 +146,27 @@ static int pnv_smp_cpu_disable(void) return 0; } +static void pnv_flush_interrupts(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (xive_enabled()) + xive_flush_interrupt(); + else + icp_opal_flush_interrupt(); + } else { + icp_native_flush_interrupt(); + } +} + static void pnv_smp_cpu_kill_self(void) { + unsigned long srr1, unexpected_mask, wmask; unsigned int cpu; - unsigned long srr1, wmask; u64 lpcr_val; /* Standard hot unplug procedure */ - /* - * This hard disables local interurpts, ensuring we have no lazy - * irqs pending. - */ - WARN_ON(irqs_disabled()); - hard_irq_disable(); - WARN_ON(lazy_irq_pending()); idle_task_exit(); - current->active_mm = NULL; /* for sanity */ cpu = smp_processor_id(); DBG("CPU%d offline\n", cpu); generic_set_cpu_dead(cpu); @@ -173,6 +177,27 @@ static void pnv_smp_cpu_kill_self(void) wmask = SRR1_WAKEMASK_P8; /* + * This turns the irq soft-disabled state we're called with, into a + * hard-disabled state with pending irq_happened interrupts cleared. + * + * PACA_IRQ_DEC - Decrementer should be ignored. + * PACA_IRQ_HMI - Can be ignored, processing is done in real mode. + * PACA_IRQ_DBELL, EE, PMI - Unexpected. + */ + hard_irq_disable(); + if (generic_check_cpu_restart(cpu)) + goto out; + + unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS); + if (local_paca->irq_happened & unexpected_mask) { + if (local_paca->irq_happened & PACA_IRQ_EE) + pnv_flush_interrupts(); + DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n", + cpu, local_paca->irq_happened); + } + local_paca->irq_happened = PACA_IRQ_HARD_DIS; + + /* * We don't want to take decrementer interrupts while we are * offline, so clear LPCR:PECE1. We keep PECE2 (and * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in. @@ -197,6 +222,7 @@ static void pnv_smp_cpu_kill_self(void) srr1 = pnv_cpu_offline(cpu); + WARN_ON_ONCE(!irqs_disabled()); WARN_ON(lazy_irq_pending()); /* @@ -212,13 +238,7 @@ static void pnv_smp_cpu_kill_self(void) */ if (((srr1 & wmask) == SRR1_WAKEEE) || ((srr1 & wmask) == SRR1_WAKEHVI)) { - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (xive_enabled()) - xive_flush_interrupt(); - else - icp_opal_flush_interrupt(); - } else - icp_native_flush_interrupt(); + pnv_flush_interrupts(); } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); @@ -266,7 +286,7 @@ static void pnv_smp_cpu_kill_self(void) */ lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1; pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); - +out: DBG("CPU%d coming online...\n", cpu); } diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 423be34f0f5f..f42fe4e86ce5 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -200,13 +200,14 @@ void ps3_mm_vas_destroy(void) { int result; - DBG("%s:%d: map.vas_id = %llu\n", __func__, __LINE__, map.vas_id); - if (map.vas_id) { result = lv1_select_virtual_address_space(0); - BUG_ON(result); - result = lv1_destruct_virtual_address_space(map.vas_id); - BUG_ON(result); + result += lv1_destruct_virtual_address_space(map.vas_id); + + if (result) { + lv1_panic(0); + } + map.vas_id = 0; } } @@ -304,19 +305,20 @@ static void ps3_mm_region_destroy(struct mem_region *r) int result; if (!r->destroy) { - pr_info("%s:%d: Not destroying high region: %llxh %llxh\n", - __func__, __LINE__, r->base, r->size); return; } - DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base); - if (r->base) { result = lv1_release_memory(r->base); - BUG_ON(result); + + if (result) { + lv1_panic(0); + } + r->size = r->base = r->offset = 0; map.total = map.rm.size; } + ps3_mm_set_repository_highmem(NULL); } diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index b33251d75927..572651a5c87b 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -411,6 +411,10 @@ static struct bus_type cmm_subsys = { .dev_name = "cmm", }; +static void cmm_release_device(struct device *dev) +{ +} + /** * cmm_sysfs_register - Register with sysfs * @@ -426,6 +430,7 @@ static int cmm_sysfs_register(struct device *dev) dev->id = 0; dev->bus = &cmm_subsys; + dev->release = cmm_release_device; if ((rc = device_register(dev))) goto subsys_unregister; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 46d0d35b9ca4..c5de633e8436 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -27,7 +27,7 @@ static bool rtas_hp_event; unsigned long pseries_memory_block_size(void) { struct device_node *np; - unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE; + u64 memblock_size = MIN_MEMORY_BLOCK_SIZE; struct resource r; np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -223,7 +223,7 @@ static int get_lmb_range(u32 drc_index, int n_lmbs, struct drmem_lmb **end_lmb) { struct drmem_lmb *lmb, *start, *end; - struct drmem_lmb *last_lmb; + struct drmem_lmb *limit; start = NULL; for_each_drmem_lmb(lmb) { @@ -236,10 +236,10 @@ static int get_lmb_range(u32 drc_index, int n_lmbs, if (!start) return -EINVAL; - end = &start[n_lmbs - 1]; + end = &start[n_lmbs]; - last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1]; - if (end > last_lmb) + limit = &drmem_info->lmbs[drmem_info->n_lmbs]; + if (end > limit) return -EINVAL; *start_lmb = start; @@ -360,8 +360,10 @@ static bool lmb_is_removable(struct drmem_lmb *lmb) for (i = 0; i < scns_per_block; i++) { pfn = PFN_DOWN(phys_addr); - if (!pfn_present(pfn)) + if (!pfn_present(pfn)) { + phys_addr += MIN_MEMORY_BLOCK_SIZE; continue; + } rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); phys_addr += MIN_MEMORY_BLOCK_SIZE; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 889dc2e44b89..3eddad5f9be0 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -132,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } -static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_free_pSeriesLP(unsigned long liobn, long, long); static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); -static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, long npages, unsigned long uaddr, enum dma_data_direction direction, unsigned long attrs) @@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; long tcenum_start = tcenum, npages_start = npages; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tceshift; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; while (npages--) { - tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift; + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; - tce_free_pSeriesLP(tbl, tcenum_start, + tce_free_pSeriesLP(liobn, tcenum_start, (npages_start - (npages + 1))); break; } if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); printk("\ttce val = 0x%llx\n", tce ); dump_stack(); @@ -193,7 +193,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, unsigned long flags; if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, npages, uaddr, direction, attrs); } @@ -209,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, /* If allocation fails, fall back to the loop implementation */ if (!tcep) { local_irq_restore(flags); - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction, attrs); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, + npages, uaddr, direction, attrs); } __this_cpu_write(tce_page, tcep); } @@ -261,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages) { u64 rc; while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); + rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); dump_stack(); } @@ -285,7 +287,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n u64 rc; if (!firmware_has_feature(FW_FEATURE_MULTITCE)) - return tce_free_pSeriesLP(tbl, tcenum, npages); + return tce_free_pSeriesLP(tbl->it_index, tcenum, npages); rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); @@ -400,6 +402,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, u64 rc = 0; long l, limit; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) { + unsigned long tceshift = be32_to_cpu(maprange->tce_shift); + unsigned long dmastart = (start_pfn << PAGE_SHIFT) + + be64_to_cpu(maprange->dma_base); + unsigned long tcenum = dmastart >> tceshift; + unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; + void *uaddr = __va(start_pfn << PAGE_SHIFT); + + return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), + tcenum, tceshift, npages, (unsigned long) uaddr, + DMA_BIDIRECTIONAL, 0); + } + local_irq_disable(); /* to protect tcep and the page behind it */ tcep = __this_cpu_read(tce_page); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 15fe7213aa6b..aa7cc6f2a125 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1255,7 +1255,7 @@ static int __init vpa_debugfs_init(void) { char name[16]; long i; - static struct dentry *vpa_dir; + struct dentry *vpa_dir; if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e33e8bc4b69b..38c306551f76 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -435,10 +435,10 @@ static void maxmem_data(struct seq_file *m) { unsigned long maxmem = 0; - maxmem += drmem_info->n_lmbs * drmem_info->lmb_size; + maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size; maxmem += hugetlb_total_pages() * PAGE_SIZE; - seq_printf(m, "MaxMem=%ld\n", maxmem); + seq_printf(m, "MaxMem=%lu\n", maxmem); } static int pseries_lparcfg_data(struct seq_file *m, void *v) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 3c17fc7c2b93..8debe017c0d6 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -238,6 +238,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) p->bus = nvdimm_bus_register(NULL, &p->bus_desc); if (!p->bus) { dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); + kfree(p->bus_desc.provider_name); return -ENXIO; } @@ -382,6 +383,7 @@ static int papr_scm_remove(struct platform_device *pdev) nvdimm_bus_unregister(p->bus); drc_pmem_unbind(p); + kfree(p->bus_desc.provider_name); kfree(p); return 0; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f16fdd0f71f7..68eec766a5d8 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -412,10 +412,11 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) /* * Some versions of FWNMI place the buffer inside the 4kB page starting at * 0x7000. Other versions place it inside the rtas buffer. We check both. + * Minimum size of the buffer is 16 bytes. */ #define VALID_FWNMI_BUFFER(A) \ - ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ - (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) + ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) static inline struct rtas_error_log *fwnmi_get_errlog(void) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index cc682759feae..c0a87b1cee24 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -71,6 +71,9 @@ #include "pseries.h" #include "../../../../drivers/pci/pci.h" +DEFINE_STATIC_KEY_FALSE(shared_processor); +EXPORT_SYMBOL_GPL(shared_processor); + int CMO_PrPSP = -1; int CMO_SecPSP = -1; unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); @@ -780,6 +783,10 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); + + if (lppaca_shared_proc(get_lppaca())) + static_branch_enable(&shared_processor); + ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; #ifdef CONFIG_PCI_IOV diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 0a24a5a185f0..f789693f61f4 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -132,15 +132,11 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - cpumask_var_t offline_mask; int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - stream_id = simple_strtoul(buf, NULL, 16); do { @@ -150,32 +146,16 @@ static ssize_t store_hibernate(struct device *dev, } while (rc == -EAGAIN); if (!rc) { - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, - cpu_online_mask); - rc = rtas_online_cpus_mask(offline_mask); - if (rc) { - pr_err("%s: Could not bring present CPUs online.\n", - __func__); - goto out; - } - stop_topology_update(); rc = pm_suspend(PM_SUSPEND_MEM); start_topology_update(); - - /* Take down CPUs not online prior to suspend */ - if (!rtas_offline_cpus_mask(offline_mask)) - pr_warn("%s: Could not restore CPUs to offline " - "state.\n", __func__); } stream_id = 0; if (!rc) rc = count; -out: - free_cpumask_var(offline_mask); + return rc; } diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index ba758f4be328..63c8dc080770 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1174,6 +1174,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) if (tbl == NULL) return NULL; + kref_init(&tbl->it_kref); + of_parse_dma_window(dev->dev.of_node, dma_window, &tbl->it_index, &offset, &size); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index be86fce1a84e..9542394300ec 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/msi.h> +#include <linux/vmalloc.h> #include <asm/prom.h> #include <asm/io.h> @@ -68,13 +69,6 @@ static u32 xive_ipi_irq; /* Xive state for each CPU */ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); -/* - * A "disabled" interrupt should never fire, to catch problems - * we set its logical number to this - */ -#define XIVE_BAD_IRQ 0x7fffffff -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) - /* An invalid CPU target */ #define XIVE_INVALID_TARGET (-1) @@ -937,12 +931,21 @@ static int xive_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); + u8 pq; switch (which) { case IRQCHIP_STATE_ACTIVE: - *state = !xd->stale_p && - (xd->saved_p || - !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P)); + pq = xive_esb_read(xd, XIVE_ESB_GET); + + /* + * The esb value being all 1's means we couldn't get + * the PQ state of the interrupt through mmio. It may + * happen, for example when querying a PHB interrupt + * while the PHB is in an error state. We consider the + * interrupt to be inactive in that case. + */ + *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && + (xd->saved_p || !!(pq & XIVE_ESB_VAL_P)); return 0; default: return -EINVAL; @@ -972,12 +975,16 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { if (xd->eoi_mmio) { + unmap_kernel_range((unsigned long)xd->eoi_mmio, + 1u << xd->esb_shift); iounmap(xd->eoi_mmio); if (xd->eoi_mmio == xd->trig_mmio) xd->trig_mmio = NULL; xd->eoi_mmio = NULL; } if (xd->trig_mmio) { + unmap_kernel_range((unsigned long)xd->trig_mmio, + 1u << xd->esb_shift); iounmap(xd->trig_mmio); xd->trig_mmio = NULL; } @@ -1000,6 +1007,15 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) xd->target = XIVE_INVALID_TARGET; irq_set_handler_data(virq, xd); + /* + * Turn OFF by default the interrupt being mapped. A side + * effect of this check is the mapping the ESB page of the + * interrupt in the Linux address space. This prevents page + * fault issues in the crash handler which masks all + * interrupts. + */ + xive_esb_read(xd, XIVE_ESB_SET_PQ_01); + return 0; } @@ -1097,7 +1113,7 @@ static int xive_setup_cpu_ipi(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); /* Check if we are already setup */ - if (xc->hw_ipi != 0) + if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; /* Grab an IPI from the backend, this will populate xc->hw_ipi */ @@ -1134,7 +1150,7 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ - if (xc->hw_ipi == 0) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; /* Mask the IPI */ @@ -1290,6 +1306,7 @@ static int xive_prepare_cpu(unsigned int cpu) if (np) xc->chip_id = of_get_ibm_chip_id(np); of_node_put(np); + xc->hw_ipi = XIVE_BAD_IRQ; per_cpu(xive_cpu, cpu) = xc; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index ad8ee7dd7f57..43f1f587a956 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -298,7 +298,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) s64 rc; /* Free the IPI */ - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); @@ -306,7 +306,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) msleep(OPAL_BUSY_DELAY_MS); continue; } - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; break; } } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 8ef9cf4ebb1c..ce756997054f 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -356,20 +356,28 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->esb_shift = esb_shift; data->trig_page = trig_page; + data->hw_irq = hw_irq; + /* * No chip-id for the sPAPR backend. This has an impact how we * pick a target. See xive_pick_irq_target(). */ data->src_chip = XIVE_INVALID_CHIP_ID; + /* + * When the H_INT_ESB flag is set, the H_INT_ESB hcall should + * be used for interrupt management. Skip the remapping of the + * ESB pages which are not available. + */ + if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB) + return 0; + data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift); if (!data->eoi_mmio) { pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq); return -ENOMEM; } - data->hw_irq = hw_irq; - /* Full function page supports trigger */ if (flags & XIVE_SRC_TRIGGER) { data->trig_mmio = data->eoi_mmio; @@ -498,11 +506,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; xive_irq_bitmap_free(xc->hw_ipi); - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 211725dbf364..a2f8d9361e6e 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,6 +5,13 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +/* + * A "disabled" interrupt should never fire, to catch problems + * we set its logical number to this + */ +#define XIVE_BAD_IRQ 0x7fffffff +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) + /* Each CPU carry one of these with various per-CPU state */ struct xive_cpu { #ifdef CONFIG_SMP diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 2b4e959caa36..014e00e74d2b 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -10,24 +10,29 @@ # based on relocs_check.pl # Copyright © 2009 IBM Corporation -if [ $# -lt 2 ]; then - echo "$0 [path to objdump] [path to vmlinux]" 1>&2 +if [ $# -lt 3 ]; then + echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2 exit 1 fi -# Have Kbuild supply the path to objdump so we handle cross compilation. +# Have Kbuild supply the path to objdump and nm so we handle cross compilation. objdump="$1" -vmlinux="$2" +nm="$2" +vmlinux="$3" + +# Remove from the bad relocations those that match an undefined weak symbol +# which will result in an absolute relocation to 0. +# Weak unresolved symbols are of that form in nm output: +# " w _binary__btf_vmlinux_bin_end" +undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }') bad_relocs=$( -"$objdump" -R "$vmlinux" | +$objdump -R "$vmlinux" | # Only look at relocation lines. grep -E '\<R_' | # These relocations are okay # On PPC64: # R_PPC64_RELATIVE, R_PPC64_NONE - # R_PPC64_ADDR64 mach_<name> - # R_PPC64_ADDR64 __crc_<name> # On PPC: # R_PPC_RELATIVE, R_PPC_ADDR16_HI, # R_PPC_ADDR16_HA,R_PPC_ADDR16_LO, @@ -39,8 +44,7 @@ R_PPC_ADDR16_HI R_PPC_ADDR16_HA R_PPC_RELATIVE R_PPC_NONE' | - grep -E -v '\<R_PPC64_ADDR64[[:space:]]+mach_' | - grep -E -v '\<R_PPC64_ADDR64[[:space:]]+__crc_' + ([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat) ) if [ -z "$bad_relocs" ]; then diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 1e972df3107e..77114755dc6f 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -18,14 +18,14 @@ vmlinux="$2" #__end_interrupts should be located within the first 64K end_intr=0x$( -"$objdump" -R "$vmlinux" -d --start-address=0xc000000000000000 \ +$objdump -R "$vmlinux" -d --start-address=0xc000000000000000 \ --stop-address=0xc000000000010000 | grep '\<__end_interrupts>:' | awk '{print $1}' ) BRANCHES=$( -"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \ +$objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \ --stop-address=${end_intr} | grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | grep -v '\<__start_initialization_multiplatform>' | diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index f142570ad860..6f9cccea54f3 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Disable clang warning for using setjmp without setjmp.h header -subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) - GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e15ccf19c153..79ac87f4d87b 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1894,15 +1894,14 @@ static void dump_300_sprs(void) printf("pidr = %.16lx tidr = %.16lx\n", mfspr(SPRN_PID), mfspr(SPRN_TIDR)); - printf("asdr = %.16lx psscr = %.16lx\n", - mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR) - : mfspr(SPRN_PSSCR_PR)); + printf("psscr = %.16lx\n", + hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); if (!hv) return; - printf("ptcr = %.16lx\n", - mfspr(SPRN_PTCR)); + printf("ptcr = %.16lx asdr = %.16lx\n", + mfspr(SPRN_PTCR), mfspr(SPRN_ASDR)); #endif } diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 3f1737f301cc..d0e24aaa2aa0 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -58,8 +58,16 @@ do { \ * The AQ/RL pair provides a RCpc critical section, but there's not really any * way we can take advantage of that here because the ordering is only enforced * on that one lock. Thus, we're just doing a full fence. + * + * Since we allow writeX to be called from preemptive regions we need at least + * an "o" in the predecessor set to ensure device writes are visible before the + * task is marked as available for scheduling on a new hart. While I don't see + * any concrete reason we need a full IO fence, it seems safer to just upgrade + * this in order to avoid any IO crossing a scheduling boundary. In both + * instances the scheduler pairs this with an mb(), so nothing is necessary on + * the new hart. */ -#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw) +#define smp_mb__after_spinlock() RISCV_FENCE(iorw,iorw) #include <asm-generic/barrier.h> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index d969bab4a26b..262e5bbb2776 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -179,7 +179,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -224,7 +224,7 @@ RISCV_ACQUIRE_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -270,7 +270,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -316,7 +316,7 @@ " fence rw, rw\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 905372d7eeb8..3df9a82bdbfd 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -12,7 +12,11 @@ #include <linux/const.h> /* thread information allocation */ +#ifdef CONFIG_64BIT +#define THREAD_SIZE_ORDER (2) +#else #define THREAD_SIZE_ORDER (1) +#endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index b94d8db5ddcc..c40fdcdeb950 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -142,7 +142,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - if (function_graph_enter(old, self_addr, frame_pointer, parent)) + if (!function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = return_hooker; } diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 70bb94ae61c5..6bf5b1674384 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -8,6 +8,10 @@ #include <linux/err.h> #include <linux/errno.h> #include <linux/moduleloader.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> +#include <asm/pgtable.h> +#include <asm/sections.h> static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -386,3 +390,15 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } + +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) +#define VMALLOC_MODULE_START \ + max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START) +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START, + VMALLOC_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index f15642715d1a..0a6659865b21 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -63,7 +63,7 @@ static void notrace walk_stackframe(struct task_struct *task, #else /* !CONFIG_FRAME_POINTER */ -static void notrace walk_stackframe(struct task_struct *task, +void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg) { unsigned long sp, pc; diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f3619f59d85c..12f8a7fce78b 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -8,6 +8,7 @@ #include <linux/syscalls.h> #include <asm/unistd.h> #include <asm/cacheflush.h> +#include <asm-generic/mman-common.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -16,6 +17,11 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, { if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; + + if ((prot & PROT_WRITE) && (prot & PROT_EXEC)) + if (unlikely(!(prot & PROT_READ))) + return -EINVAL; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 6b32190ba73c..6dc20fa5f46b 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -123,24 +123,24 @@ static inline unsigned long get_break_insn_length(unsigned long pc) asmlinkage void do_trap_break(struct pt_regs *regs) { -#ifdef CONFIG_GENERIC_BUG if (!user_mode(regs)) { enum bug_trap_type type; type = report_bug(regs->sepc, regs); switch (type) { - case BUG_TRAP_TYPE_NONE: - break; +#ifdef CONFIG_GENERIC_BUG case BUG_TRAP_TYPE_WARN: regs->sepc += get_break_insn_length(regs->sepc); - break; + return; case BUG_TRAP_TYPE_BUG: +#endif /* CONFIG_GENERIC_BUG */ + default: die(regs, "Kernel BUG"); } + } else { + force_sig_fault(SIGTRAP, TRAP_BRKPT, + (void __user *)(regs->sepc), current); } -#endif /* CONFIG_GENERIC_BUG */ - - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc), current); } #ifdef CONFIG_GENERIC_BUG diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 49a5852fd07d..a4ee3a0e7d20 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -33,15 +33,15 @@ $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) # We also create a special relocatable object that should mirror the symbol -# table and layout of the linked DSO. With ld -R we can then refer to -# these symbols in the kernel code rather than hand-coded addresses. +# table and layout of the linked DSO. With ld --just-symbols we can then +# refer to these symbols in the kernel code rather than hand-coded addresses. SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ -Wl,--build-id -Wl,--hash-style=both $(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE $(call if_changed,vdsold) -LDFLAGS_vdso-syms.o := -r -R +LDFLAGS_vdso-syms.o := -r --just-symbols $(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE $(call if_changed,ld) @@ -58,7 +58,8 @@ quiet_cmd_vdsold = VDSOLD $@ cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \ $(CROSS_COMPILE)objcopy \ - $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ + $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ + rm $@.tmp # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 9ebcff8ba263..4a2e5bf8b21a 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -14,6 +14,7 @@ void flush_icache_all(void) { sbi_remote_fence_i(NULL); } +EXPORT_SYMBOL(flush_icache_all); /* * Performs an icache flush for the given MM context. RISC-V has no direct diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 84747d7a1e85..67d7fb8924ff 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -120,8 +120,9 @@ void __init setup_bootmem(void) } BUG_ON(mem_size == 0); - set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = max_pfn; + set_max_mapnr(max_low_pfn); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 426d5c33ea90..18d5ca64d2d3 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx) return false; } +static void mark_fp(struct rv_jit_context *ctx) +{ + __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); +} + static void mark_call(struct rv_jit_context *ctx) { __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); @@ -596,7 +601,8 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); /* Set return value. */ - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); + if (reg == RV_REG_RA) + emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); } @@ -631,14 +637,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) return -1; emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx); - /* if (--TCC < 0) + /* if (TCC-- < 0) * goto out; */ emit(rv_addi(RV_REG_T1, tcc, -1), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; if (is_13b_check(off, insn)) return -1; - emit(rv_blt(RV_REG_T1, RV_REG_ZERO, off >> 1), ctx); + emit(rv_blt(tcc, RV_REG_ZERO, off >> 1), ctx); /* prog = array->ptrs[index]; * if (!prog) @@ -1414,6 +1420,10 @@ static void build_prologue(struct rv_jit_context *ctx) { int stack_adjust = 0, store_offset, bpf_stack_adjust; + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + if (bpf_stack_adjust) + mark_fp(ctx); + if (seen_reg(RV_REG_RA, ctx)) stack_adjust += 8; stack_adjust += 8; /* RV_REG_FP */ @@ -1431,7 +1441,6 @@ static void build_prologue(struct rv_jit_context *ctx) stack_adjust += 8; stack_adjust = round_up(stack_adjust, 16); - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); stack_adjust += bpf_stack_adjust; store_offset = stack_adjust - 8; diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e48013cf50a2..1d53ea0a4e14 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -69,7 +69,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -ifeq ($(call cc-option-yn,-mpacked-stack),y) +ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif @@ -146,7 +146,7 @@ all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage -install: vmlinux +install: $(Q)$(MAKE) $(build)=$(boot) $@ bzImage: vmlinux diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 7cba96e7587b..98e808b4e6ec 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -70,7 +70,7 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(obj)/startup.a: $(OBJECTS) FORCE $(call if_changed,ar) -install: $(CONFIGURE) $(obj)/bzImage +install: sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 3bdd8132e56b..ae3e22355b9f 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -73,7 +73,7 @@ static unsigned long get_random(unsigned long limit) *(unsigned long *) prng.parm_block ^= seed; for (i = 0; i < 16; i++) { cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, - (char *) entropy, (char *) entropy, + (u8 *) entropy, (u8 *) entropy, sizeof(entropy)); memcpy(prng.parm_block, entropy, sizeof(entropy)); } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index ceeacbeff600..437ddec24723 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -164,6 +164,11 @@ void startup_kernel(void) handle_relocs(__kaslr_offset); if (__kaslr_offset) { + /* + * Save KASLR offset for early dumps, before vmcore_info is set. + * Mark as uneven to distinguish from real vmcore_info pointer. + */ + S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; /* Clear non-relocated kernel */ if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) memset(img, 0, vmlinux.image_size); diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index ed007f4a6444..3f501159ee9f 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -15,7 +15,8 @@ void uv_query_info(void) if (!test_facility(158)) return; - if (uv_call(0, (uint64_t)&uvcb)) + /* rc==0x100 means that there is additional data we do not process */ + if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100) return; if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2b00a3ebee08..8b5a39e4739a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -30,12 +30,12 @@ #define KVM_USER_MEM_SLOTS 32 /* - * These seem to be used for allocating ->chip in the routing table, - * which we don't use. 4096 is an out-of-thin-air value. If we need - * to look at ->chip later on, we'll need to revisit this. + * These seem to be used for allocating ->chip in the routing table, which we + * don't use. 1 is as small as we can get to reduce the needed memory. If we + * need to look at ->chip later on, we'll need to revisit this. */ #define KVM_NR_IRQCHIPS 1 -#define KVM_IRQCHIP_NUM_PINS 4096 +#define KVM_IRQCHIP_NUM_PINS 1 #define KVM_HALT_POLL_NS_DEFAULT 50000 /* s390-specific vcpu->requests bit members */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 237ee0c4169f..612ed3c6d581 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -141,7 +141,9 @@ struct lowcore { /* br %r1 trampoline */ __u16 br_r1_trampoline; /* 0x0400 */ - __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */ + __u32 return_lpswe; /* 0x0402 */ + __u32 return_mcck_lpswe; /* 0x0406 */ + __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 823578c6b9e2..e399102367af 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -33,6 +33,8 @@ #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + #include <asm/setup.h> #ifndef __ASSEMBLY__ @@ -40,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 305befd55326..0cca0e2d2a35 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -183,7 +183,7 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_scan_pci_devices(void); int clp_rescan_pci_devices(void); -int clp_rescan_pci_devices_simple(void); +int clp_rescan_pci_devices_simple(u32 *fid); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index ff81ed19c506..61cf9531f68f 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -143,14 +143,4 @@ static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc) return __zpci_set_irq_ctrl(ctl, isc, &iib); } -#ifdef CONFIG_PCI -static inline void enable_mio_ctl(void) -{ - if (static_branch_likely(&have_mio)) - __ctl_set_bit(2, 5); -} -#else /* CONFIG_PCI */ -static inline void enable_mio_ctl(void) {} -#endif /* CONFIG_PCI */ - #endif diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index cd060b5dd8fd..e4dc64cc9c55 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -8,6 +8,10 @@ #include <linux/slab.h> #include <asm/pci_insn.h> +/* I/O size constraints */ +#define ZPCI_MAX_READ_SIZE 8 +#define ZPCI_MAX_WRITE_SIZE 128 + /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 #define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL @@ -140,7 +144,8 @@ static inline int zpci_memcpy_fromio(void *dst, while (n > 0) { size = zpci_get_max_write_size((u64 __force) src, - (u64) dst, n, 8); + (u64) dst, n, + ZPCI_MAX_READ_SIZE); rc = zpci_read_single(dst, src, size); if (rc) break; @@ -161,7 +166,8 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst, while (n > 0) { size = zpci_get_max_write_size((u64 __force) dst, - (u64) src, n, 128); + (u64) src, n, + ZPCI_MAX_WRITE_SIZE); if (size > 8) /* main path */ rc = zpci_write_block(dst, src, size); else diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index bccb8f4a63e2..77606c4acd58 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -56,7 +56,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION2_ENTRY_EMPTY); return (p4d_t *) table; } -#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d) + +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (!mm_p4d_folded(mm)) + crst_table_free(mm, (unsigned long *) p4d); +} static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { @@ -65,7 +70,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; } -#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud) + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (!mm_pud_folded(mm)) + crst_table_free(mm, (unsigned long *) pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { @@ -83,6 +93,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { + if (mm_pmd_folded(mm)) + return; pgtable_pmd_page_dtor(virt_to_page(pmd)); crst_table_free(mm, (unsigned long *) pmd); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3c17771fe43e..cb9ded67c77c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -755,6 +755,12 @@ static inline int pmd_write(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; +} + static inline int pmd_dirty(pmd_t pmd) { int dirty = 1; @@ -1172,8 +1178,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_NX) - pte_val(entry) &= ~_PAGE_NOEXEC; if (pte_present(entry)) pte_val(entry) &= ~_PAGE_UNUSED; if (mm_has_pgste(mm)) @@ -1190,6 +1194,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); + if (!MACHINE_HAS_NX) + pte_val(__pte) &= ~_PAGE_NOEXEC; return pte_mkyoung(__pte); } diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b0fcbc37b637..2c0b8b5fe19a 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -161,6 +161,7 @@ typedef struct thread_struct thread_struct; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ .fpu.regs = (void *) init_task.thread.fpu.fprs, \ + .last_break = 1, \ } /* diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index db5ef22c46e4..b265106ca331 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -228,7 +228,7 @@ struct qdio_buffer { * @sbal: absolute SBAL address */ struct sl_element { - unsigned long sbal; + u64 sbal; } __attribute__ ((packed)); /** diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 925889d360c1..eb83a57cb503 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -8,6 +8,7 @@ #include <linux/const.h> #include <uapi/asm/setup.h> +#include <linux/build_bug.h> #define EP_OFFSET 0x10008 #define EP_STRING "S390EP" @@ -153,6 +154,12 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } +static inline u32 gen_lpswe(unsigned long addr) +{ + BUILD_BUG_ON(addr > 0xfff); + return 0xb2b20000 | addr; +} + #else /* __ASSEMBLY__ */ #define IPL_DEVICE (IPL_DEVICE_OFFSET) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index f073292e9fdb..d9d5de0f67ff 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; + unsigned long error = regs->gprs[2]; +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) { + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long)(int)error; + } +#endif + return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 64539c221672..b6a4ce9dafaf 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -10,8 +10,9 @@ #ifndef _ASM_S390_TIMEX_H #define _ASM_S390_TIMEX_H -#include <asm/lowcore.h> +#include <linux/preempt.h> #include <linux/time64.h> +#include <asm/lowcore.h> /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL @@ -154,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk) static inline unsigned long long get_tod_clock(void) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; + char clk[STORE_CLOCK_EXT_SIZE]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); @@ -186,15 +187,18 @@ extern unsigned char tod_clock_base[16] __aligned(8); /** * get_clock_monotonic - returns current time in clock rate units * - * The caller must ensure that preemption is disabled. * The clock and tod_clock_base get changed via stop_machine. - * Therefore preemption must be disabled when calling this - * function, otherwise the returned value is not guaranteed to - * be monotonic. + * Therefore preemption must be disabled, otherwise the returned + * value is not guaranteed to be monotonic. */ static inline unsigned long long get_tod_clock_monotonic(void) { - return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + unsigned long long tod; + + preempt_disable_notrace(); + tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + preempt_enable_notrace(); + return tod; } /** diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index bd2fd9a7821d..a470f1fa9f2a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -83,7 +83,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); __rc; \ }) -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { unsigned long spec = 0x010000UL; int rc; @@ -113,7 +113,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) return rc; } -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { unsigned long spec = 0x01UL; int rc; diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index 6eb2ef105d87..ac4568d60d08 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -35,6 +35,7 @@ struct unwind_state { struct task_struct *task; struct pt_regs *regs; unsigned long sp, ip; + bool reuse_sp; int graph_idx; bool reliable; bool error; diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 169d7604eb80..f3ba84fa9bd1 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -36,6 +36,7 @@ struct vdso_data { __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ __u32 ts_dir; /* TOD steering direction 0x64 */ __u64 ts_end; /* TOD steering end 0x68 */ + __u32 hrtimer_res; /* hrtimer resolution 0x70 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 41ac4ad21311..a65cb4924bdb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -76,6 +76,7 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); OFFSET(__VDSO_TS_END, vdso_data, ts_end); + OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr); @@ -87,7 +88,6 @@ int main(void) DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ @@ -125,6 +125,8 @@ int main(void) OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code); OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr); + OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe); + OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe); OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw); OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 0ebf08c3b35e..b46a3c27eb08 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -198,9 +198,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas) if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { + /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */ areas[i] = kmalloc_array(pages_per_area, sizeof(debug_entry_t *), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!areas[i]) goto fail_malloc_areas2; for (j = 0; j < pages_per_area; j++) { diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index e9dac9a24d3f..61f2b0412345 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -84,7 +84,7 @@ static int show_diag_stat(struct seq_file *m, void *v) static void *show_diag_stat_start(struct seq_file *m, loff_t *pos) { - return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL; } static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index b2c68fbf2634..41925f220694 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -462,10 +462,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) ptr += sprintf(ptr, "%%v%i", value); - else if (operand->flags & OPERAND_PCREL) - ptr += sprintf(ptr, "%lx", (signed int) value - + addr); - else if (operand->flags & OPERAND_SIGNED) + else if (operand->flags & OPERAND_PCREL) { + void *pcrel = (void *)((int)value + addr); + + ptr += sprintf(ptr, "%px", pcrel); + } else if (operand->flags & OPERAND_SIGNED) ptr += sprintf(ptr, "%i", value); else ptr += sprintf(ptr, "%u", value); @@ -537,7 +538,7 @@ void show_code(struct pt_regs *regs) else *ptr++ = ' '; addr = regs->psw.addr + start - 32; - ptr += sprintf(ptr, "%016lx: ", addr); + ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -565,7 +566,7 @@ void print_fn_code(unsigned char *code, unsigned long len) opsize = insn_length(*code); if (opsize > len) break; - ptr += sprintf(ptr, "%p: ", code); + ptr += sprintf(ptr, "%px: ", code); for (i = 0; i < opsize; i++) ptr += sprintf(ptr, "%02x", code[i]); *ptr++ = '\t'; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 629f173f60cd..9cebf0487ebd 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -30,7 +30,6 @@ #include <asm/sclp.h> #include <asm/facility.h> #include <asm/boot_data.h> -#include <asm/pci_insn.h> #include "entry.h" /* @@ -155,6 +154,8 @@ static noinline __init void setup_lowcore_early(void) psw_t psw; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + if (IS_ENABLED(CONFIG_KASAN)) + psw.mask |= PSW_MASK_DAT; psw.addr = (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = (unsigned long) s390_base_pgm_handler; @@ -236,7 +237,6 @@ static __init void detect_machine_facilities(void) clock_comparator_max = -1ULL >> 1; __ctl_set_bit(0, 53); } - enable_mio_ctl(); } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3f4d272577d3..de7918a6830c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -115,26 +115,29 @@ _LPP_OFFSET = __LC_LPP .macro SWITCH_ASYNC savearea,timer tmhh %r8,0x0001 # interrupting from user ? - jnz 1f + jnz 2f lgr %r14,%r9 + cghi %r14,__LC_RETURN_LPSWE + je 0f slg %r14,BASED(.Lcritical_start) clg %r14,BASED(.Lcritical_length) - jhe 0f + jhe 1f +0: lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,cleanup_critical tmhh %r8,0x0001 # retest problem state after cleanup - jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? + jnz 2f +1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jnz 2f + jnz 3f CHECK_STACK \savearea aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f -1: UPDATE_VTIME %r14,%r15,\timer + j 4f +2: UPDATE_VTIME %r14,%r15,\timer BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -2: lg %r15,__LC_ASYNC_STACK # load async stack -3: la %r11,STACK_FRAME_OVERHEAD(%r15) +3: lg %r15,__LC_ASYNC_STACK # load async stack +4: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME w1,w2,enter_timer @@ -365,9 +368,9 @@ ENTRY(system_call) jnz .Lsysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 + sth %r1,__PT_INT_CODE+2(%r11) cghi %r1,NR_syscalls jnl .Lsysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,3 .Lsysc_nr_ok: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) @@ -401,7 +404,7 @@ ENTRY(system_call) stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW + b __LC_RETURN_LPSWE(%r0) .Lsysc_done: # @@ -608,43 +611,50 @@ ENTRY(pgm_check_handler) BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK - lg %r12,__LC_CURRENT + srag %r11,%r10,12 + jnz 0f + /* if __LC_LAST_BREAK is < 4096, it contains one of + * the lpswe addresses in lowcore. Set it to 1 (initial state) + * to prevent leaking that address to userspace. + */ + lghi %r10,1 +0: lg %r12,__LC_CURRENT lghi %r11,0 larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # test problem state bit - jnz 2f # -> fault in user space + jnz 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 slg %r14,BASED(.Lsie_critical_start) clg %r14,BASED(.Lsie_critical_length) - jhe 0f + jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit lghi %r11,_PIF_GUEST_FAULT #endif -0: tmhh %r8,0x4000 # PER bit set in old PSW ? - jnz 1f # -> enabled, can't be a double fault +1: tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3,0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -1: CHECK_STACK __LC_SAVE_AREA_SYNC +2: CHECK_STACK __LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f -2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + # CHECK_VMAP_STACK branches to stack_overflow or 5f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f +3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 aghi %r14,__TASK_thread # pointer to thread_struct lghi %r13,__LC_PGM_TDB tm __LC_PGM_ILC+2,0x02 # check for transaction abort - jz 3f + jz 4f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: stg %r10,__THREAD_last_break(%r14) -4: lgr %r13,%r11 +4: stg %r10,__THREAD_last_break(%r14) +5: lgr %r13,%r11 la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use @@ -663,14 +673,14 @@ ENTRY(pgm_check_handler) stg %r13,__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 5f + jz 6f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -5: REENABLE_IRQS +6: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) @@ -775,7 +785,7 @@ ENTRY(io_int_handler) mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER .Lio_exit_kernel: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW + b __LC_RETURN_LPSWE(%r0) .Lio_done: # @@ -1216,7 +1226,7 @@ ENTRY(mcck_int_handler) stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_MCCK_PSW + b __LC_RETURN_MCCK_LPSWE .Lmcck_panic: lg %r15,__LC_NODAT_STACK @@ -1273,6 +1283,8 @@ ENDPROC(stack_overflow) #endif ENTRY(cleanup_critical) + cghi %r9,__LC_RETURN_LPSWE + je .Lcleanup_lpswe #if IS_ENABLED(CONFIG_KVM) clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap jl 0f @@ -1426,6 +1438,7 @@ ENDPROC(cleanup_critical) mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) +.Lcleanup_lpswe: 1: lmg %r8,%r9,__LC_RETURN_PSW BR_EX %r14,%r11 .Lcleanup_sysc_restore_insn: diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index b9d8fe45737a..8f8456816d83 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { + unsigned long long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_time = READ_ONCE(idle->idle_time); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + idle_time += in_idle; return sprintf(buf, "%llu\n", idle_time >> 12); } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); @@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; + unsigned long long now, idle_enter, idle_exit, in_idle; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - - return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0); + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + return cputime_to_nsecs(in_idle); } void arch_cpu_idle_enter(void) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8a1ae140c5e2..e65c50a394f4 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -163,7 +163,9 @@ static bool kdump_csum_valid(struct kimage *image) #ifdef CONFIG_CRASH_DUMP int rc; + preempt_disable(); rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image); + preempt_enable(); return rc == 0; #else return false; @@ -253,10 +255,10 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); - mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); vmcoreinfo_append_str("SDMA=%lx\n", __sdma); vmcoreinfo_append_str("EDMA=%lx\n", __edma); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); } void machine_shutdown(void) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index fbdd3ea73667..bb7ed0d1a351 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -173,7 +173,7 @@ static int kexec_file_add_initrd(struct kimage *image, buf.mem += crashk_res.start; buf.memsz = buf.bufsz; - data->parm->initrd_start = buf.mem; + data->parm->initrd_start = data->memsz; data->parm->initrd_size = buf.memsz; data->memsz += buf.memsz; diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c index 99e7951c199c..a77d7f72dd4a 100644 --- a/arch/s390/kernel/machine_kexec_reloc.c +++ b/arch/s390/kernel/machine_kexec_reloc.c @@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, break; case R_390_64: /* Direct 64 bit. */ case R_390_GLOB_DAT: + case R_390_JMP_SLOT: *(u64 *)loc = val; break; case R_390_PC16: /* PC relative 16 bit. */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 9e1660a6b9db..f942341429b1 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -26,6 +26,12 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -35,13 +41,21 @@ EXPORT_SYMBOL(_mcount) ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller + stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller lgr %r1,%r15 #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 48d48b6187c0..0eb1d1cc53a8 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static int __hw_perf_event_init(struct perf_event *event) +static int __hw_perf_event_init(struct perf_event *event, unsigned int type) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; @@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event) int err = 0; u64 ev; - switch (attr->type) { + switch (type) { case PERF_TYPE_RAW: /* Raw events are used to access counters directly, * hence do not permit excludes */ @@ -294,17 +294,16 @@ static int __hw_perf_event_init(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { + unsigned int type = event->attr.type; int err; - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - case PERF_TYPE_RAW: - err = __hw_perf_event_init(event); - break; - default: + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) + /* Registered as unknown PMU */ + err = __hw_perf_event_init(event, PERF_TYPE_RAW); + else return -ENOENT; - } if (unlikely(err) && event->destroy) event->destroy(event); @@ -553,7 +552,7 @@ static int __init cpumf_pmu_init(void) return -ENODEV; cpumf_pmu.attr_groups = cpumf_cf_event_group(); - rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); if (rc) pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); return rc; diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index d4e031f7b9c8..be66511302ff 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event) int err = -ENOENT; debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d config %#llx " + "%s event %p cpu %d config %#llx type:%u " "sample_type %#llx cf_diag_events %d\n", __func__, - event, event->cpu, attr->config, attr->sample_type, - atomic_read(&cf_diag_events)); + event, event->cpu, attr->config, event->pmu->type, + attr->sample_type, atomic_read(&cf_diag_events)); if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || - event->attr.type != PERF_TYPE_RAW) + event->attr.type != event->pmu->type) goto out; /* Raw events are used to access counters directly, @@ -693,7 +693,7 @@ static int __init cf_diag_init(void) } debug_register_view(cf_diag_dbg, &debug_sprintf_view); - rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW); + rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); if (rc) { debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); debug_unregister(cf_diag_dbg); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1266194afb02..20e07fdb3710 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -193,7 +193,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb, gfp_t gfp_flags) { int i, rc; - unsigned long *new, *tail; + unsigned long *new, *tail, *tail_prev = NULL; if (!sfb->sdbt || !sfb->tail) return -EINVAL; @@ -232,6 +232,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, sfb->num_sdbt++; /* Link current page to tail of chain */ *tail = (unsigned long)(void *) new + 1; + tail_prev = tail; tail = new; } @@ -241,10 +242,22 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, * issue, a new realloc call (if required) might succeed. */ rc = alloc_sample_data_block(tail, gfp_flags); - if (rc) + if (rc) { + /* Undo last SDBT. An SDBT with no SDB at its first + * entry but with an SDBT entry instead can not be + * handled by the interrupt handler code. + * Avoid this situation. + */ + if (tail_prev) { + sfb->num_sdbt--; + free_page((unsigned long) new); + tail = tail_prev; + } break; + } sfb->num_sdb++; tail++; + tail_prev = new = NULL; /* Allocated at least one SBD */ } /* Link sampling buffer to its origin */ @@ -1248,18 +1261,28 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " "overflow stats: sample=%llu event=%llu\n", diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index fcb6c2e92b07..1e75cc983546 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct unwind_state state; + unsigned long addr; - unwind_for_each_frame(&state, current, regs, 0) - perf_callchain_store(entry, state.ip); + unwind_for_each_frame(&state, current, regs, 0) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } } /* Perf definitions for PMU event attributes in sysfs */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9f2727bf3cbe..e72b1cde227d 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -105,6 +105,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, p->thread.system_timer = 0; p->thread.hardirq_timer = 0; p->thread.softirq_timer = 0; + p->thread.last_break = 1; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5de13307b703..28b6cddc7d07 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -158,8 +158,9 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n) static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; + unsigned long first = cpumask_first(cpu_online_mask); - if (!n) + if (n == first) show_cpu_summary(m, v); if (!machine_has_cpu_mhz) return 0; @@ -172,6 +173,8 @@ static inline void *c_update(loff_t *pos) { if (*pos) *pos = cpumask_next(*pos - 1, cpu_online_mask); + else + *pos = cpumask_first(cpu_online_mask); return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL; } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ad71132374f0..5aa786063eb3 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -324,6 +324,25 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } +static void fixup_int_code(struct task_struct *child, addr_t data) +{ + struct pt_regs *regs = task_pt_regs(child); + int ilc = regs->int_code >> 16; + u16 insn; + + if (ilc > 6) + return; + + if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), + &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) + return; + + /* double check that tracee stopped on svc instruction */ + if ((insn >> 8) != 0xa) + return; + + regs->int_code = 0x20000 | (data & 0xffff); +} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -335,7 +354,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) struct user *dummy = NULL; addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ @@ -353,7 +374,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* Invalid addressing mode bits */ return -EINVAL; } - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) + fixup_int_code(child, data); + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* @@ -719,6 +744,10 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct compat_user, regs.gprs[2])) + fixup_int_code(child, data); /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } @@ -844,11 +873,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * call number to gprs[2]. */ if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { + tracehook_report_syscall_entry(regs)) { /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip + * Tracing decided this syscall should not happen. Skip * the system call and the system call restart handling. */ clear_pt_regs_flag(regs, PIF_SYSCALL); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f8544d517430..8fe09a672396 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -73,6 +73,7 @@ #include <asm/nospec-branch.h> #include <asm/mem_detect.h> #include <asm/uv.h> +#include <asm/asm-offsets.h> #include "entry.h" /* @@ -467,6 +468,8 @@ static void __init setup_lowcore_dat_off(void) arch_spin_lock_setup(0); #endif lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; @@ -1066,7 +1069,7 @@ static void __init log_component_list(void) if (!early_ipl_comp_list_addr) return; - if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR) + if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) pr_info("Linux is running with Secure-IPL enabled\n"); else pr_info("Linux is running with Secure-IPL disabled\n"); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 35fafa2b91a8..9760101e9d47 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -212,6 +212,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) goto out_async; if (vdso_alloc_per_cpu(lc)) @@ -266,10 +268,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; + lc->user_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); + lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[7] = lc->vdso_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(lc->stfle_fac_list)); @@ -728,39 +733,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) static int smp_add_present_cpu(int cpu); -static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) +static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, + bool configured, bool early) { struct pcpu *pcpu; - cpumask_t avail; - int cpu, nr, i, j; + int cpu, nr, i; u16 address; nr = 0; - cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - cpu = cpumask_first(&avail); - for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (sclp.has_core_type && info->core[i].type != boot_core_type) + if (sclp.has_core_type && core->type != boot_core_type) + return nr; + cpu = cpumask_first(avail); + address = core->core_id << smp_cpu_mt_shift; + for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { + if (pcpu_find_address(cpu_present_mask, address + i)) continue; - address = info->core[i].core_id << smp_cpu_mt_shift; - for (j = 0; j <= smp_cpu_mtid; j++) { - if (pcpu_find_address(cpu_present_mask, address + j)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = address + j; - pcpu->state = - (cpu >= info->configured*(smp_cpu_mtid + 1)) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); - if (cpu >= nr_cpu_ids) + pcpu = pcpu_devices + cpu; + pcpu->address = address + i; + if (configured) + pcpu->state = CPU_STATE_CONFIGURED; + else + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (!early && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpumask_clear_cpu(cpu, avail); + cpu = cpumask_next(cpu, avail); + } + return nr; +} + +static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) +{ + struct sclp_core_entry *core; + cpumask_t avail; + bool configured; + u16 core_id; + int nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + /* + * Add IPL core first (which got logical CPU number 0) to make sure + * that all SMT threads get subsequent logical CPU numbers. + */ + if (early) { + core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + for (i = 0; i < info->configured; i++) { + core = &info->core[i]; + if (core->core_id == core_id) { + nr += smp_add_core(core, &avail, true, early); break; + } } } + for (i = 0; i < info->combined; i++) { + configured = i < info->configured; + nr += smp_add_core(&info->core[i], &avail, configured, early); + } return nr; } @@ -809,7 +842,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ get_online_cpus(); - __smp_rescan_cpus(info, 0); + __smp_rescan_cpus(info, true); put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } @@ -820,6 +853,8 @@ static void smp_init_secondary(void) S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); + set_cpu_flag(CIF_ASCE_PRIMARY); + set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); preempt_disable(); init_cpu_timer(); @@ -1164,7 +1199,7 @@ int __ref smp_rescan_cpus(void) smp_get_core_info(info, 0); get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - nr = __smp_rescan_cpus(info, 1); + nr = __smp_rescan_cpus(info, false); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); kfree(info); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e8766beee5ad..8ea9db599d38 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -310,6 +310,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->tk_mult = tk->tkr_mono.mult; vdso_data->tk_shift = tk->tkr_mono.shift; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index 57fd4e902f1f..f515aa7cc9e5 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -46,18 +46,28 @@ bool unwind_next_frame(struct unwind_state *state) regs = state->regs; if (unlikely(regs)) { - sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]); - if (unlikely(outside_of_stack(state, sp))) { - if (!update_stack_info(state, sp)) - goto out_err; + if (state->reuse_sp) { + sp = state->sp; + state->reuse_sp = false; + } else { + sp = READ_ONCE_NOCHECK(regs->gprs[15]); + if (unlikely(outside_of_stack(state, sp))) { + if (!update_stack_info(state, sp)) + goto out_err; + } } sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; regs = NULL; + if (!__kernel_text_address(ip)) { + /* skip bogus %r14 */ + state->regs = NULL; + return unwind_next_frame(state); + } } else { sf = (struct stack_frame *) state->sp; - sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain); + sp = READ_ONCE_NOCHECK(sf->back_chain); if (likely(sp)) { /* Non-zero back-chain points to the previous frame */ if (unlikely(outside_of_stack(state, sp))) { @@ -65,7 +75,7 @@ bool unwind_next_frame(struct unwind_state *state) goto out_err; } sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = true; } else { /* No back-chain, look for a pt_regs structure */ @@ -73,9 +83,9 @@ bool unwind_next_frame(struct unwind_state *state) if (!on_stack(info, sp, sizeof(struct pt_regs))) goto out_stop; regs = (struct pt_regs *) sp; - if (user_mode(regs)) + if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE) goto out_stop; - ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr); + ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; } } @@ -107,9 +117,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, { struct stack_info *info = &state->stack_info; unsigned long *mask = &state->stack_mask; + bool reliable, reuse_sp; struct stack_frame *sf; unsigned long ip; - bool reliable; memset(state, 0, sizeof(*state)); state->task = task; @@ -132,12 +142,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Get the instruction pointer from pt_regs or the stack frame */ if (regs) { - ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr); + ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; + reuse_sp = true; } else { sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; + reuse_sp = false; } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -151,5 +163,6 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, state->sp = sp; state->ip = ip; state->reliable = reliable; + state->reuse_sp = reuse_sp; } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index bec19e7e6e1c..4a66a1cb919b 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - -Wl,--hash-style=both +ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ + --hash-style=both --build-id -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) @@ -37,8 +37,8 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE + $(call if_changed,ld) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 081435398e0a..0c79caa32b59 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -17,12 +17,14 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: CFI_STARTPROC - larl %r1,4f + larl %r1,3f + lg %r0,0(%r1) cghi %r2,__CLOCK_REALTIME_COARSE je 0f cghi %r2,__CLOCK_MONOTONIC_COARSE je 0f - larl %r1,3f + larl %r1,_vdso_data + llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -36,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ 1: lghi %r2,0 @@ -45,6 +46,5 @@ __kernel_clock_getres: svc 0 br %r14 CFI_ENDPROC -3: .quad __CLOCK_REALTIME_RES -4: .quad __CLOCK_COARSE_RES +3: .quad __CLOCK_COARSE_RES .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 149525b5df1b..f5d3b8711e20 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2208,7 +2208,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&fi->ais_lock); ais.simm = fi->simm; @@ -2517,7 +2517,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT; @@ -2597,7 +2597,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_all ais; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) return -EFAULT; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 681deaf9f20a..41455c6caae8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1927,6 +1927,9 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } + if (start >= slots->used_slots) + return slots->used_slots - 1; + if (gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); @@ -2865,9 +2868,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | CR14_UNUSED_33 | CR14_EXTERNAL_DAMAGE_SUBMASK; - /* make sure the new fpc will be lazily loaded */ - save_fpu_regs(); - current->thread.fpu.fpc = 0; + vcpu->run->s.regs.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; @@ -4352,7 +4353,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_s390_vcpu_store_status(vcpu, arg); + r = kvm_s390_store_status_unloaded(vcpu, arg); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 076090f9e666..4f6c22d72072 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1202,6 +1202,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->iprcc = PGM_ADDRESSING; scb_s->pgmilc = 4; scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4); + rc = 1; } return rc; } diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index c4f8039a35e8..0267405ab7c6 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -64,10 +64,13 @@ mm_segment_t enable_sacf_uaccess(void) { mm_segment_t old_fs; unsigned long asce, cr; + unsigned long flags; old_fs = current->thread.mm_segment; if (old_fs & 1) return old_fs; + /* protect against a concurrent page table upgrade */ + local_irq_save(flags); current->thread.mm_segment |= 1; asce = S390_lowcore.kernel_asce; if (likely(old_fs == USER_DS)) { @@ -83,6 +86,7 @@ mm_segment_t enable_sacf_uaccess(void) __ctl_load(asce, 7, 7); set_cpu_flag(CIF_ASCE_SECONDARY); } + local_irq_restore(flags); return old_fs; } EXPORT_SYMBOL(enable_sacf_uaccess); diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 510a18299196..a51c892f14f3 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -298,16 +298,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, } if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) + len = min(*lenp, sizeof(buf)); + if (copy_from_user(buf, buffer, len)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; + buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); seconds = simple_strtoul(p, &p, 0); cmm_set_timeout(nr, seconds); + *ppos += *lenp; } else { len = sprintf(buf, "%ld %ld\n", cmm_timeout_pages, cmm_timeout_seconds); @@ -315,9 +315,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, len = *lenp; if (copy_to_user(buffer, buf, len)) return -EFAULT; + *lenp = len; + *ppos += len; } - *lenp = len; - *ppos += len; return 0; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 1e668b95e0c6..f41053055c6f 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -787,14 +787,18 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { + const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table; if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) return NULL; if (gmap_is_shadow(gmap) && gmap->removed) return NULL; - if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11))) + + if (asce_type != _ASCE_TYPE_REGION1 && + gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) return NULL; + table = gmap->table; switch (gmap->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: @@ -1840,6 +1844,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, goto out_free; } else if (*table & _REGION_ENTRY_ORIGIN) { rc = -EAGAIN; /* Race with shadow */ + goto out_free; } crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ @@ -2480,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], } EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + + split_huge_pmd(vma, pmd, addr); + return 0; +} + +static const struct mm_walk_ops thp_split_walk_ops = { + .pmd_entry = thp_split_walk_pmd_entry, +}; + static inline void thp_split_mm(struct mm_struct *mm) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE struct vm_area_struct *vma; - unsigned long addr; for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - for (addr = vma->vm_start; - addr < vma->vm_end; - addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; + walk_page_vma(vma, &thp_split_walk_ops, NULL); } mm->def_flags |= VM_NOHUGEPAGE; -#endif } +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * Remove all empty zero pages from the mapping for lazy refaulting diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index b0246c705a19..ff8234bca56c 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -2,7 +2,7 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007,2016 + * Copyright IBM Corp. 2007,2020 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> */ @@ -11,6 +11,9 @@ #include <linux/mm.h> #include <linux/hugetlb.h> +#include <linux/mman.h> +#include <linux/sched/mm.h> +#include <linux/security.h> /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -114,7 +117,7 @@ static inline pte_t __rste_to_pte(unsigned long rste) _PAGE_YOUNG); #ifdef CONFIG_MEM_SOFT_DIRTY pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, - _PAGE_DIRTY); + _PAGE_SOFT_DIRTY); #endif pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); @@ -156,10 +159,13 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste &= ~_SEGMENT_ENTRY_NOEXEC; /* Set correct table type for 2G hugepages */ - if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; - else + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + if (likely(pte_present(pte))) + rste |= _REGION3_ENTRY_LARGE; + rste |= _REGION_ENTRY_TYPE_R3; + } else if (likely(pte_present(pte))) rste |= _SEGMENT_ENTRY_LARGE; + clear_huge_pte_skeys(mm, rste); pte_val(*ptep) = rste; } @@ -267,3 +273,98 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = current->mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + unsigned long addr; + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + goto check_asce_limit; + } + + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma))) + goto check_asce_limit; + } + + if (mm->get_unmapped_area == arch_get_unmapped_area) + addr = hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + addr = hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit && + addr + len <= TASK_SIZE) { + rc = crst_table_upgrade(mm, addr + len); + if (rc) + return (unsigned long) rc; + } + return addr; +} diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 818deeb1ebc3..b1cba90ce49c 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -69,7 +69,7 @@ void notrace s390_kernel_write(void *dst, const void *src, size_t size) } } -static int __memcpy_real(void *dest, void *src, size_t count) +static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; register unsigned long _len1 asm("3") = (unsigned long) count; @@ -90,19 +90,23 @@ static int __memcpy_real(void *dest, void *src, size_t count) return rc; } -static unsigned long _memcpy_real(unsigned long dest, unsigned long src, - unsigned long count) +static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, + unsigned long src, + unsigned long count) { int irqs_disabled, rc; unsigned long flags; if (!count) return 0; - flags = __arch_local_irq_stnsm(0xf8UL); + flags = arch_local_irq_save(); irqs_disabled = arch_irqs_disabled_flags(flags); if (!irqs_disabled) trace_hardirqs_off(); + __arch_local_irq_stnsm(0xf8); // disable DAT rc = __memcpy_real((void *) dest, (void *) src, (size_t) count); + if (flags & PSW_MASK_DAT) + __arch_local_irq_stosm(0x04); // enable DAT if (!irqs_disabled) trace_hardirqs_on(); __arch_local_irq_ssm(flags); @@ -114,9 +118,15 @@ static unsigned long _memcpy_real(unsigned long dest, unsigned long src, */ int memcpy_real(void *dest, void *src, size_t count) { - if (S390_lowcore.nodat_stack != 0) - return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, - 3, dest, src, count); + int rc; + + if (S390_lowcore.nodat_stack != 0) { + preempt_disable(); + rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3, + dest, src, count); + preempt_enable(); + return rc; + } /* * This is a really early memcpy_real call, the stacks are * not set up yet. Just call _memcpy_real on the early boot diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 99e06213a22b..2b366abca8c1 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -72,8 +72,20 @@ static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; - if (current->active_mm == mm) - set_user_asce(mm); + /* we must change all active ASCEs to avoid the creation of new TLBs */ + if (current->active_mm == mm) { + S390_lowcore.user_asce = mm->context.asce; + if (current->thread.mm_segment == USER_DS) { + __ctl_load(S390_lowcore.user_asce, 1, 1); + /* Mark user-ASCE present in CR1 */ + clear_cpu_flag(CIF_ASCE_PRIMARY); + } + if (current->thread.mm_segment == USER_DS_SACF) { + __ctl_load(S390_lowcore.user_asce, 7, 7); + /* enable_sacf_uaccess does all or nothing */ + WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY)); + } + } __tlb_flush_local(); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b403fa14847d..f810930aff42 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -415,6 +415,10 @@ void __init vmem_map_init(void) SET_MEMORY_RO | SET_MEMORY_X); __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); + + /* we need lowcore executable for our LPSWE instructions */ + set_memory_x(0, 1); + pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 86ca7f88fb22..42f360fa8833 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -423,7 +423,7 @@ static void zpci_map_resources(struct pci_dev *pdev) if (static_branch_likely(&have_mio)) pdev->resource[i].start = - (resource_size_t __force) zdev->bars[i].mio_wb; + (resource_size_t __force) zdev->bars[i].mio_wt; else pdev->resource[i].start = (resource_size_t __force) pci_iomap(pdev, i, 0); @@ -529,7 +529,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev, flags |= IORESOURCE_MEM_64; if (static_branch_likely(&have_mio)) - addr = (unsigned long) zdev->bars[i].mio_wb; + addr = (unsigned long) zdev->bars[i].mio_wt; else addr = ZPCI_ADDR(entry); size = 1UL << zdev->bars[i].size; @@ -889,8 +889,10 @@ static int __init pci_base_init(void) if (!test_facility(69) || !test_facility(71)) return 0; - if (test_facility(153) && !s390_pci_no_mio) + if (test_facility(153) && !s390_pci_no_mio) { static_branch_enable(&have_mio); + ctl_set_bit(2, 5); + } rc = zpci_debug_init(); if (rc) @@ -931,5 +933,5 @@ subsys_initcall_sync(pci_base_init); void zpci_rescan(void) { if (zpci_is_enabled()) - clp_rescan_pci_devices_simple(); + clp_rescan_pci_devices_simple(NULL); } diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d03631dba7c2..64674b49d951 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -240,12 +240,14 @@ error: } /* - * Enable/Disable a given PCI function defined by its function handle. + * Enable/Disable a given PCI function and update its function handle if + * necessary */ -static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) +static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; int rc, retries = 100; + u32 fid = zdev->fid; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) @@ -256,7 +258,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_SET_PCI_FN; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = *fh; + rrb->request.fh = zdev->fh; rrb->request.oc = command; rrb->request.ndas = nr_dma_as; @@ -269,12 +271,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) - *fh = rrb->response.fh; - else { + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("Set PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; + } + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + zdev->fh = rrb->response.fh; + } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY && + rrb->response.fh == 0) { + /* Function is already in desired state - update handle */ + rc = clp_rescan_pci_devices_simple(&fid); } clp_free_block(rrb); return rc; @@ -282,18 +289,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) { - u32 fh = zdev->fh; int rc; - rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN); - zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN); + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); if (rc) goto out; - zdev->fh = fh; if (zdev->mio_capable) { - rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO); - zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO); + zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", + zdev->fid, zdev->fh, rc); if (rc) clp_disable_fh(zdev); } @@ -303,17 +309,13 @@ out: int clp_disable_fh(struct zpci_dev *zdev) { - u32 fh = zdev->fh; int rc; if (!zdev_enabled(zdev)) return 0; - rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); - zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); - if (!rc) - zdev->fh = fh; - + rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } @@ -370,10 +372,14 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) static void __clp_update(struct clp_fh_list_entry *entry, void *data) { struct zpci_dev *zdev; + u32 *fid = data; if (!entry->vendor_id) return; + if (fid && *fid != entry->fid) + return; + zdev = get_zdev_by_fid(entry->fid); if (!zdev) return; @@ -413,7 +419,10 @@ int clp_rescan_pci_devices(void) return rc; } -int clp_rescan_pci_devices_simple(void) +/* Rescan PCI functions and refresh function handles. If fid is non-NULL only + * refresh the handle of the function matching @fid + */ +int clp_rescan_pci_devices_simple(u32 *fid) { struct clp_req_rsp_list_pci *rrb; int rc; @@ -422,7 +431,7 @@ int clp_rescan_pci_devices_simple(void) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, NULL, __clp_update); + rc = clp_list_pci(rrb, fid, __clp_update); clp_free_block(rrb); return rc; diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index d80616ae8dd8..743f257cf2cb 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -115,7 +115,6 @@ static struct irq_chip zpci_irq_chip = { .name = "PCI-MSI", .irq_unmask = pci_msi_unmask_irq, .irq_mask = pci_msi_mask_irq, - .irq_set_affinity = zpci_set_irq_affinity, }; static void zpci_handle_cpu_local_irq(bool rescan) @@ -276,7 +275,9 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) rc = -EIO; if (hwirq - bit >= msi_vecs) break; - irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity); + irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, + (irq_delivery == DIRECTED) ? + msi->affinity : NULL); if (irq < 0) return -ENOMEM; rc = irq_set_msi_desc(irq, msi); @@ -284,7 +285,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return rc; irq_set_chip_and_handler(irq, &zpci_irq_chip, handle_percpu_irq); - msg.data = hwirq; + msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { msg.address_lo = zdev->msi_addr & 0xff0000ff; msg.address_lo |= msi->affinity ? diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 7d42a8794f10..020a2c514d96 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -11,6 +11,113 @@ #include <linux/mm.h> #include <linux/errno.h> #include <linux/pci.h> +#include <asm/pci_io.h> +#include <asm/pci_debug.h> + +static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset) +{ + struct { + u64 offset; + u8 cc; + u8 status; + } data = {offset, cc, status}; + + zpci_err_hex(&data, sizeof(data)); +} + +static inline int __pcistb_mio_inuser( + void __iomem *ioaddr, const void __user *src, + u64 len, u8 *status) +{ + int cc = -ENXIO; + + asm volatile ( + " sacf 256\n" + "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n" + "1: ipm %[cc]\n" + " srl %[cc],28\n" + "2: sacf 768\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : [cc] "+d" (cc), [len] "+d" (len) + : [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src)) + : "cc", "memory"); + *status = len >> 24 & 0xff; + return cc; +} + +static inline int __pcistg_mio_inuser( + void __iomem *ioaddr, const void __user *src, + u64 ulen, u8 *status) +{ + register u64 addr asm("2") = (u64 __force) ioaddr; + register u64 len asm("3") = ulen; + int cc = -ENXIO; + u64 val = 0; + u64 cnt = ulen; + u8 tmp; + + /* + * copy 0 < @len <= 8 bytes from @src into the right most bytes of + * a register, then store it to PCI at @ioaddr while in secondary + * address space. pcistg then uses the user mappings. + */ + asm volatile ( + " sacf 256\n" + "0: llgc %[tmp],0(%[src])\n" + " sllg %[val],%[val],8\n" + " aghi %[src],1\n" + " ogr %[val],%[tmp]\n" + " brctg %[cnt],0b\n" + "1: .insn rre,0xb9d40000,%[val],%[ioaddr]\n" + "2: ipm %[cc]\n" + " srl %[cc],28\n" + "3: sacf 768\n" + EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b) + : + [src] "+a" (src), [cnt] "+d" (cnt), + [val] "+d" (val), [tmp] "=d" (tmp), + [len] "+d" (len), [cc] "+d" (cc), + [ioaddr] "+a" (addr) + :: "cc", "memory"); + *status = len >> 24 & 0xff; + + /* did we read everything from user memory? */ + if (!cc && cnt != 0) + cc = -EFAULT; + + return cc; +} + +static inline int __memcpy_toio_inuser(void __iomem *dst, + const void __user *src, size_t n) +{ + int size, rc = 0; + u8 status = 0; + mm_segment_t old_fs; + + if (!src) + return -EINVAL; + + old_fs = enable_sacf_uaccess(); + while (n > 0) { + size = zpci_get_max_write_size((u64 __force) dst, + (u64 __force) src, n, + ZPCI_MAX_WRITE_SIZE); + if (size > 8) /* main path */ + rc = __pcistb_mio_inuser(dst, src, size, &status); + else + rc = __pcistg_mio_inuser(dst, src, size, &status); + if (rc) + break; + src += size; + dst += size; + n -= size; + } + disable_sacf_uaccess(old_fs); + if (rc) + zpci_err_mmio(rc, status, (__force u64) dst); + return rc; +} static long get_pfn(unsigned long user_addr, unsigned long access, unsigned long *pfn) @@ -46,6 +153,20 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length) return -EINVAL; + + /* + * Only support read access to MIO capable devices on a MIO enabled + * system. Otherwise we would have to check for every address if it is + * a special ZPCI_ADDR and we would have to do a get_pfn() which we + * don't need for MIO capable devices. + */ + if (static_branch_likely(&have_mio)) { + ret = __memcpy_toio_inuser((void __iomem *) mmio_addr, + user_buffer, + length); + return ret; + } + if (length > 64) { buf = kmalloc(length, GFP_KERNEL); if (!buf) @@ -56,7 +177,8 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, ret = get_pfn(mmio_addr, VM_WRITE, &pfn); if (ret) goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | + (mmio_addr & ~PAGE_MASK)); ret = -EFAULT; if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) @@ -72,6 +194,78 @@ out: return ret; } +static inline int __pcilg_mio_inuser( + void __user *dst, const void __iomem *ioaddr, + u64 ulen, u8 *status) +{ + register u64 addr asm("2") = (u64 __force) ioaddr; + register u64 len asm("3") = ulen; + u64 cnt = ulen; + int shift = ulen * 8; + int cc = -ENXIO; + u64 val, tmp; + + /* + * read 0 < @len <= 8 bytes from the PCI memory mapped at @ioaddr (in + * user space) into a register using pcilg then store these bytes at + * user address @dst + */ + asm volatile ( + " sacf 256\n" + "0: .insn rre,0xb9d60000,%[val],%[ioaddr]\n" + "1: ipm %[cc]\n" + " srl %[cc],28\n" + " ltr %[cc],%[cc]\n" + " jne 4f\n" + "2: ahi %[shift],-8\n" + " srlg %[tmp],%[val],0(%[shift])\n" + "3: stc %[tmp],0(%[dst])\n" + " aghi %[dst],1\n" + " brctg %[cnt],2b\n" + "4: sacf 768\n" + EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) + : + [cc] "+d" (cc), [val] "=d" (val), [len] "+d" (len), + [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), + [shift] "+d" (shift) + : + [ioaddr] "a" (addr) + : "cc", "memory"); + + /* did we write everything to the user space buffer? */ + if (!cc && cnt != 0) + cc = -EFAULT; + + *status = len >> 24 & 0xff; + return cc; +} + +static inline int __memcpy_fromio_inuser(void __user *dst, + const void __iomem *src, + unsigned long n) +{ + int size, rc = 0; + u8 status; + mm_segment_t old_fs; + + old_fs = enable_sacf_uaccess(); + while (n > 0) { + size = zpci_get_max_write_size((u64 __force) src, + (u64 __force) dst, n, + ZPCI_MAX_READ_SIZE); + rc = __pcilg_mio_inuser(dst, src, size, &status); + if (rc) + break; + src += size; + dst += size; + n -= size; + } + disable_sacf_uaccess(old_fs); + if (rc) + zpci_err_mmio(rc, status, (__force u64) dst); + return rc; +} + SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, void __user *, user_buffer, size_t, length) { @@ -86,12 +280,27 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length) return -EINVAL; + + /* + * Only support write access to MIO capable devices on a MIO enabled + * system. Otherwise we would have to check for every address if it is + * a special ZPCI_ADDR and we would have to do a get_pfn() which we + * don't need for MIO capable devices. + */ + if (static_branch_likely(&have_mio)) { + ret = __memcpy_fromio_inuser( + user_buffer, (const void __iomem *)mmio_addr, + length); + return ret; + } + if (length > 64) { buf = kmalloc(length, GFP_KERNEL); if (!buf) return -ENOMEM; - } else + } else { buf = local_buf; + } ret = get_pfn(mmio_addr, VM_READ, &pfn); if (ret) diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 430c14b006d1..0e11fc023fe7 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -13,6 +13,8 @@ #include <linux/stat.h> #include <linux/pci.h> +#include "../../../drivers/pci/pci.h" + #include <asm/sclp.h> #define zpci_attr(name, fmt, member) \ @@ -40,31 +42,50 @@ zpci_attr(segment3, "0x%02x\n", pfip[3]); static ssize_t recover_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + struct kernfs_node *kn; struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = to_zpci(pdev); - int ret; - - if (!device_remove_file_self(dev, attr)) - return count; - + int ret = 0; + + /* Can't use device_remove_self() here as that would lead us to lock + * the pci_rescan_remove_lock while holding the device' kernfs lock. + * This would create a possible deadlock with disable_slot() which is + * not directly protected by the device' kernfs lock but takes it + * during the device removal which happens under + * pci_rescan_remove_lock. + * + * This is analogous to sdev_store_delete() in + * drivers/scsi/scsi_sysfs.c + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + WARN_ON_ONCE(!kn); + /* device_remove_file() serializes concurrent calls ignoring all but + * the first + */ + device_remove_file(dev, attr); + + /* A concurrent call to recover_store() may slip between + * sysfs_break_active_protection() and the sysfs file removal. + * Once it unblocks from pci_lock_rescan_remove() the original pdev + * will already be removed. + */ pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(pdev); - ret = zpci_disable_device(zdev); - if (ret) - goto error; - - ret = zpci_enable_device(zdev); - if (ret) - goto error; - - pci_rescan_bus(zdev->bus); + if (pci_dev_is_added(pdev)) { + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + goto out; + + ret = zpci_enable_device(zdev); + if (ret) + goto out; + pci_rescan_bus(zdev->bus); + } +out: pci_unlock_rescan_remove(); - - return count; - -error: - pci_unlock_rescan_remove(); - return ret; + if (kn) + sysfs_unbreak_active_protection(kn); + return ret ? ret : count; } static DEVICE_ATTR_WO(recover); diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index dc1ae4ff79d7..ff318acb3d77 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -13,8 +13,10 @@ $(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE - $(call if_changed_rule,cc_o_c) +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c new file mode 100644 index 000000000000..c98c22a72db7 --- /dev/null +++ b/arch/s390/purgatory/string.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define __HAVE_ARCH_MEMCMP /* arch function */ +#include "../lib/string.c" diff --git a/arch/sh/boards/mach-landisk/setup.c b/arch/sh/boards/mach-landisk/setup.c index 16b4d8b0bb85..2c44b94f82fb 100644 --- a/arch/sh/boards/mach-landisk/setup.c +++ b/arch/sh/boards/mach-landisk/setup.c @@ -82,6 +82,9 @@ device_initcall(landisk_devices_setup); static void __init landisk_setup(char **cmdline_p) { + /* I/O port identity mapping */ + __set_io_port_base(0); + /* LED ON */ __raw_writeb(__raw_readb(PA_LED) | 0x03, PA_LED); diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index b56f908b1395..c267035421f1 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -14,6 +14,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address); extern void pmd_free(struct mm_struct *mm, pmd_t *pmd); +#define __pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, (pmdp)) #endif static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, @@ -70,15 +71,6 @@ do { \ tlb_remove_page((tlb), (pte)); \ } while (0) -#if CONFIG_PGTABLE_LEVELS > 2 -#define __pmd_free_tlb(tlb, pmdp, addr) \ -do { \ - struct page *page = virt_to_page(pmdp); \ - pgtable_pmd_page_dtor(page); \ - tlb_remove_page((tlb), page); \ -} while (0); -#endif - static inline void check_pgt_cache(void) { quicklist_trim(QUICK_PT, NULL, 25, 16); diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index d516e5d48818..b887cc402b71 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -78,8 +78,15 @@ enum { GPIO_FN_WDTOVF, /* CAN */ - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1, - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2, GPIO_FN_CRX2, + GPIO_FN_CTX1, GPIO_FN_CRX1, + GPIO_FN_CTX0, GPIO_FN_CRX0, + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1, + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20, + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20, /* DMAC */ GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7734.h b/arch/sh/include/cpu-sh4/cpu/sh7734.h index 96f0246ad2f2..82b63208135a 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7734.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7734.h @@ -134,7 +134,7 @@ enum { GPIO_FN_EX_WAIT1, GPIO_FN_SD1_DAT0_A, GPIO_FN_DREQ2, GPIO_FN_CAN1_TX_C, GPIO_FN_ET0_LINK_C, GPIO_FN_ET0_ETXD5_A, GPIO_FN_EX_WAIT0, GPIO_FN_TCLK1_B, - GPIO_FN_RD_WR, GPIO_FN_TCLK0, + GPIO_FN_RD_WR, GPIO_FN_TCLK0, GPIO_FN_CAN_CLK_B, GPIO_FN_ET0_ETXD4, GPIO_FN_EX_CS5, GPIO_FN_SD1_CMD_A, GPIO_FN_ATADIR, GPIO_FN_QSSL_B, GPIO_FN_ET0_ETXD3_A, GPIO_FN_EX_CS4, GPIO_FN_SD1_WP_A, GPIO_FN_ATAWR, GPIO_FN_QMI_QIO1_B, diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index d31f66e82ce5..4a8ec9e40cc2 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -199,7 +199,7 @@ syscall_trace_entry: mov.l @(OFF_R7,r15), r7 ! arg3 mov.l @(OFF_R3,r15), r3 ! syscall_nr ! - mov.l 2f, r10 ! Number of syscalls + mov.l 6f, r10 ! Number of syscalls cmp/hs r10, r3 bf syscall_call mov #-ENOSYS, r0 @@ -353,7 +353,7 @@ ENTRY(system_call) tst r9, r8 bf syscall_trace_entry ! - mov.l 2f, r8 ! Number of syscalls + mov.l 6f, r8 ! Number of syscalls cmp/hs r8, r3 bt syscall_badsys ! @@ -392,7 +392,7 @@ syscall_exit: #if !defined(CONFIG_CPU_SH2) 1: .long TRA #endif -2: .long NR_syscalls +6: .long NR_syscalls 3: .long sys_call_table 7: .long do_syscall_trace_enter 8: .long do_syscall_trace_leave diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 26ab6f5bbaaf..4dcb8cd8f612 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -64,7 +64,6 @@ config SPARC64 select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE select HAVE_MEMBLOCK_NODE_MAP select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index 688911051b44..f4afa301954a 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -407,6 +407,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) } #define ioremap_nocache(X,Y) ioremap((X),(Y)) +#define ioremap_uc(X,Y) ioremap((X),(Y)) #define ioremap_wc(X,Y) ioremap((X),(Y)) #define ioremap_wt(X,Y) ioremap((X),(Y)) diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index a2f3fa61ee36..8cb8f3833239 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -28,6 +28,15 @@ void flush_tlb_pending(void); #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() +/* + * SPARC64's hardware TLB fill does not use the Linux page-tables + * and therefore we don't need a TLBI when freeing page-table pages. + */ + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE +#define tlb_needs_table_invalidate() (false) +#endif + #include <asm-generic/tlb.h> #endif /* _SPARC64_TLB_H */ diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 9d0d125500e2..084b8949ddff 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -15,19 +15,19 @@ struct ipc64_perm { - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; #ifndef __arch64__ - unsigned short __pad0; + unsigned short __pad0; #endif - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; }; #endif /* __SPARC_IPCBUF_H */ diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index 16b50afe7b52..646dd58169ec 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -46,82 +46,79 @@ enum sparc_regset { REGSET_FP, }; +static int regwindow32_get(struct task_struct *target, + const struct pt_regs *regs, + u32 *uregs) +{ + unsigned long reg_window = regs->u_regs[UREG_I6]; + int size = 16 * sizeof(u32); + + if (target == current) { + if (copy_from_user(uregs, (void __user *)reg_window, size)) + return -EFAULT; + } else { + if (access_process_vm(target, reg_window, uregs, size, + FOLL_FORCE) != size) + return -EFAULT; + } + return 0; +} + +static int regwindow32_set(struct task_struct *target, + const struct pt_regs *regs, + u32 *uregs) +{ + unsigned long reg_window = regs->u_regs[UREG_I6]; + int size = 16 * sizeof(u32); + + if (target == current) { + if (copy_to_user((void __user *)reg_window, uregs, size)) + return -EFAULT; + } else { + if (access_process_vm(target, reg_window, uregs, size, + FOLL_FORCE | FOLL_WRITE) != size) + return -EFAULT; + } + return 0; +} + static int genregs32_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { const struct pt_regs *regs = target->thread.kregs; - unsigned long __user *reg_window; - unsigned long *k = kbuf; - unsigned long __user *u = ubuf; - unsigned long reg; + u32 uregs[16]; + int ret; if (target == current) flush_user_windows(); - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) { - for (; count > 0 && pos < 16; count--) - *k++ = regs->u_regs[pos++]; - - reg_window = (unsigned long __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - for (; count > 0 && pos < 32; count--) { - if (get_user(*k++, ®_window[pos++])) - return -EFAULT; - } - } else { - for (; count > 0 && pos < 16; count--) { - if (put_user(regs->u_regs[pos++], u++)) - return -EFAULT; - } - - reg_window = (unsigned long __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - for (; count > 0 && pos < 32; count--) { - if (get_user(reg, ®_window[pos++]) || - put_user(reg, u++)) - return -EFAULT; - } - } - while (count > 0) { - switch (pos) { - case 32: /* PSR */ - reg = regs->psr; - break; - case 33: /* PC */ - reg = regs->pc; - break; - case 34: /* NPC */ - reg = regs->npc; - break; - case 35: /* Y */ - reg = regs->y; - break; - case 36: /* WIM */ - case 37: /* TBR */ - reg = 0; - break; - default: - goto finish; - } + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->u_regs, + 0, 16 * sizeof(u32)); + if (ret || !count) + return ret; - if (kbuf) - *k++ = reg; - else if (put_user(reg, u++)) + if (pos < 32 * sizeof(u32)) { + if (regwindow32_get(target, regs, uregs)) return -EFAULT; - pos++; - count--; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + uregs, + 16 * sizeof(u32), 32 * sizeof(u32)); + if (ret || !count) + return ret; } -finish: - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 38 * sizeof(reg), -1); + uregs[0] = regs->psr; + uregs[1] = regs->pc; + uregs[2] = regs->npc; + uregs[3] = regs->y; + uregs[4] = 0; /* WIM */ + uregs[5] = 0; /* TBR */ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + uregs, + 32 * sizeof(u32), 38 * sizeof(u32)); } static int genregs32_set(struct task_struct *target, @@ -130,82 +127,58 @@ static int genregs32_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { struct pt_regs *regs = target->thread.kregs; - unsigned long __user *reg_window; - const unsigned long *k = kbuf; - const unsigned long __user *u = ubuf; - unsigned long reg; + u32 uregs[16]; + u32 psr; + int ret; if (target == current) flush_user_windows(); - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) { - for (; count > 0 && pos < 16; count--) - regs->u_regs[pos++] = *k++; - - reg_window = (unsigned long __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - for (; count > 0 && pos < 32; count--) { - if (put_user(*k++, ®_window[pos++])) - return -EFAULT; - } - } else { - for (; count > 0 && pos < 16; count--) { - if (get_user(reg, u++)) - return -EFAULT; - regs->u_regs[pos++] = reg; - } - - reg_window = (unsigned long __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - for (; count > 0 && pos < 32; count--) { - if (get_user(reg, u++) || - put_user(reg, ®_window[pos++])) - return -EFAULT; - } - } - while (count > 0) { - unsigned long psr; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->u_regs, + 0, 16 * sizeof(u32)); + if (ret || !count) + return ret; - if (kbuf) - reg = *k++; - else if (get_user(reg, u++)) + if (pos < 32 * sizeof(u32)) { + if (regwindow32_get(target, regs, uregs)) return -EFAULT; - - switch (pos) { - case 32: /* PSR */ - psr = regs->psr; - psr &= ~(PSR_ICC | PSR_SYSCALL); - psr |= (reg & (PSR_ICC | PSR_SYSCALL)); - regs->psr = psr; - break; - case 33: /* PC */ - regs->pc = reg; - break; - case 34: /* NPC */ - regs->npc = reg; - break; - case 35: /* Y */ - regs->y = reg; - break; - case 36: /* WIM */ - case 37: /* TBR */ - break; - default: - goto finish; - } - - pos++; - count--; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + uregs, + 16 * sizeof(u32), 32 * sizeof(u32)); + if (ret) + return ret; + if (regwindow32_set(target, regs, uregs)) + return -EFAULT; + if (!count) + return 0; } -finish: - pos *= sizeof(reg); - count *= sizeof(reg); - + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &psr, + 32 * sizeof(u32), 33 * sizeof(u32)); + if (ret) + return ret; + regs->psr = (regs->psr & ~(PSR_ICC | PSR_SYSCALL)) | + (psr & (PSR_ICC | PSR_SYSCALL)); + if (!count) + return 0; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->pc, + 33 * sizeof(u32), 34 * sizeof(u32)); + if (ret || !count) + return ret; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->npc, + 34 * sizeof(u32), 35 * sizeof(u32)); + if (ret || !count) + return ret; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->y, + 35 * sizeof(u32), 36 * sizeof(u32)); + if (ret || !count) + return ret; return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - 38 * sizeof(reg), -1); + 36 * sizeof(u32), 38 * sizeof(u32)); } static int fpregs32_get(struct task_struct *target, diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index c9d41a96468f..3f5930bfab06 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -572,19 +572,13 @@ static int genregs32_get(struct task_struct *target, for (; count > 0 && pos < 32; count--) { if (access_process_vm(target, (unsigned long) - ®_window[pos], + ®_window[pos++], ®, sizeof(reg), FOLL_FORCE) != sizeof(reg)) return -EFAULT; - if (access_process_vm(target, - (unsigned long) u, - ®, sizeof(reg), - FOLL_FORCE | FOLL_WRITE) - != sizeof(reg)) + if (put_user(reg, u++)) return -EFAULT; - pos++; - u++; } } } @@ -684,12 +678,7 @@ static int genregs32_set(struct task_struct *target, } } else { for (; count > 0 && pos < 32; count--) { - if (access_process_vm(target, - (unsigned long) - u, - ®, sizeof(reg), - FOLL_FORCE) - != sizeof(reg)) + if (get_user(reg, u++)) return -EFAULT; if (access_process_vm(target, (unsigned long) diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 61afd787bd0c..59b6df13ddea 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -172,12 +172,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index aaebbc00d262..ae22398c3157 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -379,7 +379,6 @@ pgtable_t pte_alloc_one(struct mm_struct *mm) return NULL; page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT); if (!pgtable_page_ctor(page)) { - __free_page(page); return NULL; } return page; diff --git a/arch/um/Makefile b/arch/um/Makefile index 273130cf91d1..fcf9bf31c176 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -140,6 +140,7 @@ export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) # When cleaning we don't include .config, so we don't include # TT or skas makefiles and don't clean skas_ptregs.h. CLEAN_FILES += linux x.i gmon.out +MRPROPER_DIRS += arch/$(SUBARCH)/include/generated archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 33c1cd6a12ac..d75ed5e1e5eb 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1403,8 +1403,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irq(&ubd_dev->lock); - if (ret < 0) - blk_mq_requeue_request(req, true); + if (ret < 0) { + if (ret == -ENOMEM) + res = BLK_STS_RESOURCE; + else + res = BLK_STS_DEV_RESOURCE; + } return res; } @@ -1602,7 +1606,9 @@ int io_thread(void *arg) written = 0; do { - res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); + res = os_write_file(kernel_fd, + ((char *) io_req_buffer) + written, + n - written); if (res >= 0) { written += res; } diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index f25b110d4e70..60bc6a1c7ae6 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -8,6 +8,7 @@ #include <errno.h> #include <fcntl.h> #include <signal.h> +#include <linux/falloc.h> #include <sys/ioctl.h> #include <sys/mount.h> #include <sys/socket.h> diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index e2839b5c246c..6539c50fb9aa 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -87,7 +87,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 15255f388a85..c4ff6580cfeb 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -386,7 +386,13 @@ int count_immovable_mem_regions(void) table = table_addr + sizeof(struct acpi_table_srat); while (table + sizeof(struct acpi_subtable_header) < table_end) { + sub_table = (struct acpi_subtable_header *)table; + if (!sub_table->length) { + debug_putstr("Invalid zero length SRAT subtable.\n"); + return 0; + } + if (sub_table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) { struct acpi_srat_mem_affinity *ma; diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 220d1279d0e2..712f8687ce82 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -13,6 +13,7 @@ #include <asm/e820/types.h> #include <asm/setup.h> #include <asm/desc.h> +#include <asm/boot.h> #include "../string.h" #include "eboot.h" @@ -821,7 +822,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) status = efi_relocate_kernel(sys_table, &bzimage_addr, hdr->init_size, hdr->init_size, hdr->pref_address, - hdr->kernel_alignment); + hdr->kernel_alignment, + LOAD_PHYSICAL_ADDR); if (status != EFI_SUCCESS) { efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); goto fail; diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 37380c0d5999..c6c4b877f3d2 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -49,16 +49,17 @@ * Position Independent Executable (PIE) so that linker won't optimize * R_386_GOT32X relocation to its fixed symbol address. Older * linkers generate R_386_32 relocations against locally defined symbols, - * _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less + * _bss, _ebss, _got, _egot and _end, in PIE. It isn't wrong, just less * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle * R_386_32 relocations when relocating the kernel. To generate - * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as + * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as * hidden: */ .hidden _bss .hidden _ebss .hidden _got .hidden _egot + .hidden _end __HEAD ENTRY(startup_32) @@ -106,7 +107,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx cmpl $LOAD_PHYSICAL_ADDR, %ebx - jge 1f + jae 1f #endif movl $LOAD_PHYSICAL_ADDR, %ebx 1: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fafb75c6c592..0ad88e343ec5 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -42,6 +42,7 @@ .hidden _ebss .hidden _got .hidden _egot + .hidden _end __HEAD .code32 @@ -106,7 +107,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx cmpl $LOAD_PHYSICAL_ADDR, %ebx - jge 1f + jae 1f #endif movl $LOAD_PHYSICAL_ADDR, %ebx 1: @@ -244,6 +245,11 @@ ENTRY(efi32_stub_entry) leal efi32_config(%ebp), %eax movl %eax, efi_config(%ebp) + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + jmp startup_32 ENDPROC(efi32_stub_entry) #endif @@ -292,7 +298,7 @@ ENTRY(startup_64) notq %rax andq %rax, %rbp cmpq $LOAD_PHYSICAL_ADDR, %rbp - jge 1f + jae 1f #endif movq $LOAD_PHYSICAL_ADDR, %rbp 1: diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c index 748456c365f4..9557c5a15b91 100644 --- a/arch/x86/boot/compressed/kaslr_64.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -29,9 +29,6 @@ #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" -/* Used by pgtable.h asm code to force instruction serialization. */ -unsigned long __force_order; - /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 76e1edf5bf12..34c6aff410d9 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -12,7 +12,9 @@ * It is not referenced from the code, but GCC < 5 with -fPIE would fail * due to an undefined symbol. Define it to make these ancient GCCs work. */ +#ifndef CONFIG_RANDOMIZE_BASE unsigned long __force_order; +#endif #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index 5f6a5af9c489..77043a82da51 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -127,10 +127,6 @@ ddq_add_8: /* generate a unique variable for ddq_add_x */ -.macro setddq n - var_ddq_add = ddq_add_\n -.endm - /* generate a unique variable for xmm register */ .macro setxdata n var_xdata = %xmm\n @@ -140,9 +136,7 @@ ddq_add_8: .macro club name, id .altmacro - .if \name == DDQ_DATA - setddq %\id - .elseif \name == XDATA + .if \name == XDATA setxdata %\id .endif .noaltmacro @@ -165,9 +159,8 @@ ddq_add_8: .set i, 1 .rept (by - 1) - club DDQ_DATA, i club XDATA, i - vpaddq var_ddq_add(%rip), xcounter, var_xdata + vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata vptest ddq_low_msk(%rip), var_xdata jnz 1f vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata @@ -180,8 +173,7 @@ ddq_add_8: vmovdqa 1*16(p_keys), xkeyA vpxor xkey0, xdata0, xdata0 - club DDQ_DATA, by - vpaddq var_ddq_add(%rip), xcounter, xcounter + vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter vptest ddq_low_msk(%rip), xcounter jnz 1f vpaddq ddq_high_add_1(%rip), xcounter, xcounter diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index e40bdf024ba7..9afeb58c910e 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -266,7 +266,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff PSHUFB_XMM %xmm2, %xmm0 movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv - PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7 movdqu HashKey(%arg2), %xmm13 CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ @@ -978,7 +978,7 @@ _initial_blocks_done\@: * arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ -.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ +.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM1, \XMM5 @@ -1186,7 +1186,7 @@ aes_loop_par_enc_done\@: * arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ -.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ +.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM1, \XMM5 diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c index f7567cbd35b6..80fcb85736e1 100644 --- a/arch/x86/crypto/nhpoly1305-avx2-glue.c +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -29,7 +29,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2); diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c index a661ede3b5cf..cc6b7c1a2705 100644 --- a/arch/x86/crypto/nhpoly1305-sse2-glue.c +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -29,7 +29,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2); diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index d6f2e29be3e2..b3f121478738 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 - /* - * Push registers and sanitize registers of values that a - * speculation attack might otherwise want to exploit. The - * lower registers are likely clobbered well before they - * could be put to use in a speculative execution gadget. - * Interleave XOR with PUSH for better uop scheduling: - */ .if \save_ret pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ @@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with pushq %rsi /* pt_regs->si */ .endif pushq \rdx /* pt_regs->dx */ - xorl %edx, %edx /* nospec dx */ pushq %rcx /* pt_regs->cx */ - xorl %ecx, %ecx /* nospec cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorl %r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorl %r9d, %r9d /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ - xorl %r10d, %r10d /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ - xorl %r11d, %r11d /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ - xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ - xorl %r12d, %r12d /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ - xorl %r13d, %r13d /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ - xorl %r14d, %r14d /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ - xorl %r15d, %r15d /* nospec r15*/ UNWIND_HINT_REGS + .if \save_ret pushq %rsi /* return address on top of stack */ .endif + + /* + * Sanitize registers of values that a speculation attack might + * otherwise want to exploit. The lower registers are likely clobbered + * well before they could be put to use in a speculative execution + * gadget. + */ + xorl %edx, %edx /* nospec dx */ + xorl %ecx, %ecx /* nospec cx */ + xorl %r8d, %r8d /* nospec r8 */ + xorl %r9d, %r9d /* nospec r9 */ + xorl %r10d, %r10d /* nospec r10 */ + xorl %r11d, %r11d /* nospec r11 */ + xorl %ebx, %ebx /* nospec rbx */ + xorl %ebp, %ebp /* nospec rbp */ + xorl %r12d, %r12d /* nospec r12 */ + xorl %r13d, %r13d /* nospec r13 */ + xorl %r14d, %r14d /* nospec r14 */ + xorl %r15d, %r15d /* nospec r15 */ + .endm .macro POP_REGS pop_rdi=1 skip_r11rcx=0 @@ -172,21 +174,6 @@ For 32-bit we have the following conventions - kernel is built with .endif .endm -/* - * This is a sneaky trick to help the unwinder find pt_regs on the stack. The - * frame pointer is replaced with an encoded pointer to pt_regs. The encoding - * is just setting the LSB, which makes it an invalid stack address and is also - * a signal to the unwinder that it's a pt_regs pointer in disguise. - * - * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts - * the original rbp. - */ -.macro ENCODE_FRAME_POINTER ptregs_offset=0 -#ifdef CONFIG_FRAME_POINTER - leaq 1+\ptregs_offset(%rsp), %rbp -#endif -.endm - #ifdef CONFIG_PAGE_TABLE_ISOLATION /* @@ -375,3 +362,9 @@ For 32-bit we have the following conventions - kernel is built with .Lafter_call_\@: #endif .endm + +#ifdef CONFIG_PARAVIRT_XXL +#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg +#else +#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg +#endif diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2418804e66b4..73c216d73417 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -290,15 +290,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) nr = syscall_trace_enter(regs); - /* - * NB: Native and x32 syscalls are dispatched from the same - * table. The only functional difference is the x32 bit in - * regs->orig_ax, which changes the behavior of some syscalls. - */ - nr &= __SYSCALL_MASK; if (likely(nr < NR_syscalls)) { nr = array_index_nospec(nr, NR_syscalls); regs->ax = sys_call_table[nr](regs); +#ifdef CONFIG_X86_X32_ABI + } else if (likely((nr & __X32_SYSCALL_BIT) && + (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) { + nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT, + X32_NR_syscalls); + regs->ax = x32_sys_call_table[nr](regs); +#endif } syscall_return_slowpath(regs); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f49e11669271..e5530e74b738 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -67,7 +67,6 @@ # define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else # define preempt_stop(clobbers) -# define resume_kernel restore_all_kernel #endif .macro TRACE_IRQS_IRET @@ -173,7 +172,7 @@ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI .if \no_user_check == 0 /* coming from usermode? */ - testl $SEGMENT_RPL_MASK, PT_CS(%esp) + testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp) jz .Lend_\@ .endif /* On user-cr3? */ @@ -203,10 +202,126 @@ .Lend_\@: .endm -.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 +#define CS_FROM_ENTRY_STACK (1 << 31) +#define CS_FROM_USER_CR3 (1 << 30) +#define CS_FROM_KERNEL (1 << 29) +#define CS_FROM_ESPFIX (1 << 28) + +.macro FIXUP_FRAME + /* + * The high bits of the CS dword (__csh) are used for CS_FROM_*. + * Clear them in case hardware didn't do this for us. + */ + andl $0x0000ffff, 4*4(%esp) + +#ifdef CONFIG_VM86 + testl $X86_EFLAGS_VM, 5*4(%esp) + jnz .Lfrom_usermode_no_fixup_\@ +#endif + testl $USER_SEGMENT_RPL_MASK, 4*4(%esp) + jnz .Lfrom_usermode_no_fixup_\@ + + orl $CS_FROM_KERNEL, 4*4(%esp) + + /* + * When we're here from kernel mode; the (exception) stack looks like: + * + * 6*4(%esp) - <previous context> + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs + * + * Lets build a 5 entry IRET frame after that, such that struct pt_regs + * is complete and in particular regs->sp is correct. This gives us + * the original 6 enties as gap: + * + * 14*4(%esp) - <previous context> + * 13*4(%esp) - gap / flags + * 12*4(%esp) - gap / cs + * 11*4(%esp) - gap / ip + * 10*4(%esp) - gap / orig_eax + * 9*4(%esp) - gap / gs / function + * 8*4(%esp) - gap / fs + * 7*4(%esp) - ss + * 6*4(%esp) - sp + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs + */ + + pushl %ss # ss + pushl %esp # sp (points at ss) + addl $7*4, (%esp) # point sp back at the previous context + pushl 7*4(%esp) # flags + pushl 7*4(%esp) # cs + pushl 7*4(%esp) # ip + pushl 7*4(%esp) # orig_eax + pushl 7*4(%esp) # gs / function + pushl 7*4(%esp) # fs +.Lfrom_usermode_no_fixup_\@: +.endm + +.macro IRET_FRAME + /* + * We're called with %ds, %es, %fs, and %gs from the interrupted + * frame, so we shouldn't use them. Also, we may be in ESPFIX + * mode and therefore have a nonzero SS base and an offset ESP, + * so any attempt to access the stack needs to use SS. (except for + * accesses through %esp, which automatically use SS.) + */ + testl $CS_FROM_KERNEL, 1*4(%esp) + jz .Lfinished_frame_\@ + + /* + * Reconstruct the 3 entry IRET frame right after the (modified) + * regs->sp without lowering %esp in between, such that an NMI in the + * middle doesn't scribble our stack. + */ + pushl %eax + pushl %ecx + movl 5*4(%esp), %eax # (modified) regs->sp + + movl 4*4(%esp), %ecx # flags + movl %ecx, %ss:-1*4(%eax) + + movl 3*4(%esp), %ecx # cs + andl $0x0000ffff, %ecx + movl %ecx, %ss:-2*4(%eax) + + movl 2*4(%esp), %ecx # ip + movl %ecx, %ss:-3*4(%eax) + + movl 1*4(%esp), %ecx # eax + movl %ecx, %ss:-4*4(%eax) + + popl %ecx + lea -4*4(%eax), %esp + popl %eax +.Lfinished_frame_\@: +.endm + +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0 cld +.if \skip_gs == 0 PUSH_GS +.endif pushl %fs + + pushl %eax + movl $(__KERNEL_PERCPU), %eax + movl %eax, %fs +.if \unwind_espfix > 0 + UNWIND_ESPFIX_STACK +.endif + popl %eax + + FIXUP_FRAME pushl %es pushl %ds pushl \pt_regs_ax @@ -219,19 +334,17 @@ movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es - movl $(__KERNEL_PERCPU), %edx - movl %edx, %fs +.if \skip_gs == 0 SET_KERNEL_GS %edx - +.endif /* Switch to kernel stack if necessary */ .if \switch_stacks > 0 SWITCH_TO_KERNEL_STACK .endif - .endm -.macro SAVE_ALL_NMI cr3_reg:req - SAVE_ALL +.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0 + SAVE_ALL unwind_espfix=\unwind_espfix BUG_IF_WRONG_CR3 @@ -247,22 +360,6 @@ .Lend_\@: .endm -/* - * This is a sneaky trick to help the unwinder find pt_regs on the stack. The - * frame pointer is replaced with an encoded pointer to pt_regs. The encoding - * is just clearing the MSB, which makes it an invalid stack address and is also - * a signal to the unwinder that it's a pt_regs pointer in disguise. - * - * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the - * original rbp. - */ -.macro ENCODE_FRAME_POINTER -#ifdef CONFIG_FRAME_POINTER - mov %esp, %ebp - andl $0x7fffffff, %ebp -#endif -.endm - .macro RESTORE_INT_REGS popl %ebx popl %ecx @@ -279,6 +376,7 @@ 2: popl %es 3: popl %fs POP_GS \pop + IRET_FRAME .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -317,7 +415,8 @@ .macro CHECK_AND_APPLY_ESPFIX #ifdef CONFIG_X86_ESPFIX32 -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX @@ -375,9 +474,6 @@ * switch to it before we do any copying. */ -#define CS_FROM_ENTRY_STACK (1 << 31) -#define CS_FROM_USER_CR3 (1 << 30) - .macro SWITCH_TO_KERNEL_STACK ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV @@ -391,13 +487,6 @@ * that register for the time this macro runs */ - /* - * The high bits of the CS dword (__csh) are used for - * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case - * hardware didn't do this for us. - */ - andl $(0x0000ffff), PT_CS(%esp) - /* Are we on the entry stack? Bail out if not! */ movl PER_CPU_VAR(cpu_entry_area), %ecx addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx @@ -755,7 +844,7 @@ ret_from_intr: andl $SEGMENT_RPL_MASK, %eax #endif cmpl $USER_RPL, %eax - jb resume_kernel # not returning to v8086 or userspace + jb restore_all_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) @@ -765,18 +854,6 @@ ENTRY(resume_userspace) jmp restore_all END(ret_from_exception) -#ifdef CONFIG_PREEMPT -ENTRY(resume_kernel) - DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0, PER_CPU_VAR(__preempt_count) - jnz restore_all_kernel - testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all_kernel - call preempt_schedule_irq - jmp restore_all_kernel -END(resume_kernel) -#endif - GLOBAL(__begin_SYSENTER_singlestep_region) /* * All code from here through __end_SYSENTER_singlestep_region is subject @@ -1027,6 +1104,15 @@ restore_all: INTERRUPT_RETURN restore_all_kernel: +#ifdef CONFIG_PREEMPT + DISABLE_INTERRUPTS(CLBR_ANY) + cmpl $0, PER_CPU_VAR(__preempt_count) + jnz .Lno_preempt + testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? + jz .Lno_preempt + call preempt_schedule_irq +.Lno_preempt: +#endif TRACE_IRQS_IRET PARANOID_EXIT_TO_KERNEL_MODE BUG_IF_WRONG_CR3 @@ -1062,30 +1148,43 @@ ENDPROC(entry_INT80_32) * We can't call C functions using the ESPFIX stack. This code reads * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. + * + * We might be on user CR3 here, so percpu data is not mapped and we can't + * access the GDT through the percpu segment. Instead, use SGDT to find + * the cpu_entry_area alias of the GDT. */ #ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ + pushl %ecx + subl $2*4, %esp + sgdt (%esp) + movl 2(%esp), %ecx /* GDT address */ + /* + * Careful: ECX is a linear pointer, so we need to force base + * zero. %cs is the only known-linear segment we have right now. + */ + mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */ + mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */ shl $16, %eax + addl $2*4, %esp + popl %ecx addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS pushl %eax lss (%esp), %esp /* switch to the normal stack segment */ #endif .endm + .macro UNWIND_ESPFIX_STACK + /* It's safe to clobber %eax, all other regs need to be preserved */ #ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax - jne 27f - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es + jne .Lno_fixup_\@ /* switch to normal stack */ FIXUP_ESPFIX_STACK -27: +.Lno_fixup_\@: #endif .endm @@ -1275,11 +1374,6 @@ END(spurious_interrupt_bug) #ifdef CONFIG_XEN_PV ENTRY(xen_hypervisor_callback) - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - TRACE_IRQS_OFF - /* * Check to see if we got the event in the critical * region in xen_iret_direct, after we've reenabled @@ -1287,16 +1381,17 @@ ENTRY(xen_hypervisor_callback) * iret instruction's behaviour where it delivers a * pending interrupt when enabling interrupts: */ - movl PT_EIP(%esp), %eax - cmpl $xen_iret_start_crit, %eax + cmpl $xen_iret_start_crit, (%esp) jb 1f - cmpl $xen_iret_end_crit, %eax + cmpl $xen_iret_end_crit, (%esp) jae 1f - - jmp xen_iret_crit_fixup - -ENTRY(xen_do_upcall) -1: mov %esp, %eax + call xen_iret_crit_fixup +1: + pushl $-1 /* orig_ax = -1 => not a system call */ + SAVE_ALL + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + mov %esp, %eax call xen_evtchn_do_upcall #ifndef CONFIG_PREEMPT call xen_maybe_preempt_hcall @@ -1378,37 +1473,48 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, ENTRY(page_fault) ASM_CLAC pushl $do_page_fault - ALIGN - jmp common_exception + jmp common_exception_read_cr2 END(page_fault) -common_exception: +common_exception_read_cr2: /* the function address is in %gs's slot on the stack */ - pushl %fs - pushl %es - pushl %ds - pushl %eax - movl $(__USER_DS), %eax - movl %eax, %ds - movl %eax, %es - movl $(__KERNEL_PERCPU), %eax - movl %eax, %fs - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - SWITCH_TO_KERNEL_STACK + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 + ENCODE_FRAME_POINTER - cld - UNWIND_ESPFIX_STACK + + /* fixup %gs */ GS_TO_REG %ecx - movl PT_GS(%esp), %edi # get the function address + movl PT_GS(%esp), %edi + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx + + GET_CR2_INTO(%ecx) # might clobber %eax + + /* fixup orig %eax */ movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + + TRACE_IRQS_OFF + movl %esp, %eax # pt_regs pointer + CALL_NOSPEC %edi + jmp ret_from_exception +END(common_exception_read_cr2) + +common_exception: + /* the function address is in %gs's slot on the stack */ + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 + ENCODE_FRAME_POINTER + + /* fixup %gs */ + GS_TO_REG %ecx + movl PT_GS(%esp), %edi # get the function address REG_TO_PTGS %ecx SET_KERNEL_GS %ecx + + /* fixup orig %eax */ + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi @@ -1436,6 +1542,10 @@ ENTRY(nmi) ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 + /* + * ESPFIX_SS is only ever set on the return to user path + * after we've switched to the entry stack. + */ pushl %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1471,6 +1581,11 @@ ENTRY(nmi) movl %ebx, %esp .Lnmi_return: +#ifdef CONFIG_X86_ESPFIX32 + testl $CS_FROM_ESPFIX, PT_CS(%esp) + jnz .Lnmi_from_espfix +#endif + CHECK_AND_APPLY_ESPFIX RESTORE_ALL_NMI cr3_reg=%edi pop=4 jmp .Lirq_return @@ -1478,23 +1593,42 @@ ENTRY(nmi) #ifdef CONFIG_X86_ESPFIX32 .Lnmi_espfix_stack: /* - * create the pointer to lss back + * Create the pointer to LSS back */ pushl %ss pushl %esp addl $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - .endr - pushl %eax - SAVE_ALL_NMI cr3_reg=%edi + + /* Copy the (short) IRET frame */ + pushl 4*4(%esp) # flags + pushl 4*4(%esp) # cs + pushl 4*4(%esp) # ip + + pushl %eax # orig_ax + + SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1 ENCODE_FRAME_POINTER - FIXUP_ESPFIX_STACK # %eax == %esp + + /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */ + xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp) + xorl %edx, %edx # zero error code - call do_nmi + movl %esp, %eax # pt_regs pointer + jmp .Lnmi_from_sysenter_stack + +.Lnmi_from_espfix: RESTORE_ALL_NMI cr3_reg=%edi - lss 12+4(%esp), %esp # back to espfix stack + /* + * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to + * fix up the gap and long frame: + * + * 3 - original frame (exception) + * 2 - ESPFIX block (above) + * 6 - gap (FIXUP_FRAME) + * 5 - long frame (FIXUP_FRAME) + * 1 - orig_ax + */ + lss (1+5+6)*4(%esp), %esp # back to espfix stack jmp .Lirq_return #endif END(nmi) @@ -1513,6 +1647,7 @@ ENTRY(int3) END(int3) ENTRY(general_protection) + ASM_CLAC pushl $do_general_protection jmp common_exception END(general_protection) @@ -1521,7 +1656,7 @@ END(general_protection) ENTRY(async_page_fault) ASM_CLAC pushl $do_async_page_fault - jmp common_exception + jmp common_exception_read_cr2 END(async_page_fault) #endif diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 69808aaa6851..b83feb7d9464 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -249,7 +249,6 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) */ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ - UNWIND_HINT_EMPTY POP_REGS pop_rdi=0 skip_r11rcx=1 /* @@ -258,6 +257,7 @@ syscall_return_via_sysret: */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY pushq RSP-RDI(%rdi) /* RSP */ pushq (%rdi) /* RDI */ @@ -512,7 +512,7 @@ END(spurious_entries_start) * +----------------------------------------------------+ */ ENTRY(interrupt_entry) - UNWIND_HINT_FUNC + UNWIND_HINT_IRET_REGS offset=16 ASM_CLAC cld @@ -544,9 +544,9 @@ ENTRY(interrupt_entry) pushq 5*8(%rdi) /* regs->eflags */ pushq 4*8(%rdi) /* regs->cs */ pushq 3*8(%rdi) /* regs->ip */ + UNWIND_HINT_IRET_REGS pushq 2*8(%rdi) /* regs->orig_ax */ pushq 8(%rdi) /* return address */ - UNWIND_HINT_FUNC movq (%rdi), %rdi jmp 2f @@ -637,6 +637,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY /* Copy the IRET frame to the trampoline stack. */ pushq 6*8(%rdi) /* SS */ @@ -866,18 +867,84 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) +.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0 + + .if \paranoid + call paranoid_entry + /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ + .else + call error_entry + .endif + UNWIND_HINT_REGS + + .if \read_cr2 + /* + * Store CR2 early so subsequent faults cannot clobber it. Use R12 as + * intermediate storage as RDX can be clobbered in enter_from_user_mode(). + * GET_CR2_INTO can clobber RAX. + */ + GET_CR2_INTO(%r12); + .endif + + .if \shift_ist != -1 + TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ + .else + TRACE_IRQS_OFF + .endif + + .if \paranoid == 0 + testb $3, CS(%rsp) + jz .Lfrom_kernel_no_context_tracking_\@ + CALL_enter_from_user_mode +.Lfrom_kernel_no_context_tracking_\@: + .endif + + movq %rsp, %rdi /* pt_regs pointer */ + + .if \has_error_code + movq ORIG_RAX(%rsp), %rsi /* get error code */ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + .else + xorl %esi, %esi /* no error code */ + .endif + + .if \shift_ist != -1 + subq $\ist_offset, CPU_TSS_IST(\shift_ist) + .endif + + .if \read_cr2 + movq %r12, %rdx /* Move CR2 into 3rd argument */ + .endif + + call \do_sym + + .if \shift_ist != -1 + addq $\ist_offset, CPU_TSS_IST(\shift_ist) + .endif + + .if \paranoid + /* this procedure expect "no swapgs" flag in ebx */ + jmp paranoid_exit + .else + jmp error_exit + .endif + +.endm + /** * idtentry - Generate an IDT entry stub * @sym: Name of the generated entry point - * @do_sym: C function to be called - * @has_error_code: True if this IDT vector has an error code on the stack - * @paranoid: non-zero means that this vector may be invoked from + * @do_sym: C function to be called + * @has_error_code: True if this IDT vector has an error code on the stack + * @paranoid: non-zero means that this vector may be invoked from * kernel mode with user GSBASE and/or user CR3. * 2 is special -- see below. * @shift_ist: Set to an IST index if entries from kernel mode should - * decrement the IST stack so that nested entries get a + * decrement the IST stack so that nested entries get a * fresh stack. (This is for #DB, which has a nasty habit - * of recursing.) + * of recursing.) + * @create_gap: create a 6-word stack gap when coming from kernel mode. + * @read_cr2: load CR2 into the 3rd argument; done before calling any C code * * idtentry generates an IDT stub that sets up a usable kernel context, * creates struct pt_regs, and calls @do_sym. The stub has the following @@ -902,15 +969,19 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 /* Sanity check */ - .if \shift_ist != -1 && \paranoid == 0 + .if \shift_ist != -1 && \paranoid != 1 .error "using shift_ist requires paranoid=1" .endif + .if \create_gap && \paranoid + .error "using create_gap requires paranoid=0" + .endif + ASM_CLAC .if \has_error_code == 0 @@ -936,47 +1007,7 @@ ENTRY(\sym) .Lfrom_usermode_no_gap_\@: .endif - .if \paranoid - call paranoid_entry - .else - call error_entry - .endif - UNWIND_HINT_REGS - /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ - - .if \paranoid - .if \shift_ist != -1 - TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ - .else - TRACE_IRQS_OFF - .endif - .endif - - movq %rsp, %rdi /* pt_regs pointer */ - - .if \has_error_code - movq ORIG_RAX(%rsp), %rsi /* get error code */ - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - .else - xorl %esi, %esi /* no error code */ - .endif - - .if \shift_ist != -1 - subq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - - call \do_sym - - .if \shift_ist != -1 - addq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - - /* these procedures expect "no swapgs" flag in ebx */ - .if \paranoid - jmp paranoid_exit - .else - jmp error_exit - .endif + idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset .if \paranoid == 1 /* @@ -985,21 +1016,9 @@ ENTRY(\sym) * run in real process context if user_mode(regs). */ .Lfrom_usermode_switch_stack_\@: - call error_entry - - movq %rsp, %rdi /* pt_regs pointer */ - - .if \has_error_code - movq ORIG_RAX(%rsp), %rsi /* get error code */ - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - .else - xorl %esi, %esi /* no error code */ + idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0 .endif - call \do_sym - - jmp error_exit - .endif _ASM_NOKPROBE(\sym) END(\sym) .endm @@ -1009,7 +1028,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=2 +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 @@ -1176,10 +1195,10 @@ idtentry xendebug do_debug has_error_code=0 #endif idtentry general_protection do_general_protection has_error_code=1 -idtentry page_fault do_page_fault has_error_code=1 +idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 #ifdef CONFIG_KVM_GUEST -idtentry async_page_fault do_async_page_fault has_error_code=1 +idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1 #endif #ifdef CONFIG_X86_MCE @@ -1286,20 +1305,11 @@ ENTRY(error_entry) movq %rax, %rsp /* switch stack */ ENCODE_FRAME_POINTER pushq %r12 - - /* - * We need to tell lockdep that IRQs are off. We can't do this until - * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). - */ - TRACE_IRQS_OFF - CALL_enter_from_user_mode ret .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY .Lerror_entry_done: - TRACE_IRQS_OFF ret /* @@ -1706,11 +1716,17 @@ nmi_restore: iretq END(nmi) +#ifndef CONFIG_IA32_EMULATION +/* + * This handles SYSCALL from 32-bit code. There is no way to program + * MSRs to fully disable 32-bit SYSCALL. + */ ENTRY(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret END(ignore_sysret) +#endif ENTRY(rewind_stack_do_exit) UNWIND_HINT_FUNC @@ -1719,7 +1735,7 @@ ENTRY(rewind_stack_do_exit) movq PER_CPU_VAR(cpu_current_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp - UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE + UNWIND_HINT_REGS call do_exit END(rewind_stack_do_exit) diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index aa3336a7cb15..7d17b3addbbb 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -10,13 +10,11 @@ #ifdef CONFIG_IA32_EMULATION /* On X86_64, we use struct pt_regs * to pass parameters to syscalls */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); - -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); - +#define __sys_ni_syscall __ia32_sys_ni_syscall #else /* CONFIG_IA32_EMULATION */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __sys_ni_syscall sys_ni_syscall #endif /* CONFIG_IA32_EMULATION */ #include <asm/syscalls_32.h> @@ -29,6 +27,6 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_compat_max] = &sys_ni_syscall, + [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall, #include <asm/syscalls_32.h> }; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index d5252bc1e380..adf619a856e8 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -4,22 +4,53 @@ #include <linux/linkage.h> #include <linux/sys.h> #include <linux/cache.h> +#include <linux/syscalls.h> #include <asm/asm-offsets.h> #include <asm/syscall.h> -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); +extern asmlinkage long sys_ni_syscall(void); + +SYSCALL_DEFINE0(ni_syscall) +{ + return sys_ni_syscall(); +} + #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); +#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual) #include <asm/syscalls_64.h> #undef __SYSCALL_64 +#undef __SYSCALL_X32 #define __SYSCALL_64(nr, sym, qual) [nr] = sym, +#define __SYSCALL_X32(nr, sym, qual) asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, + [0 ... __NR_syscall_max] = &__x64_sys_ni_syscall, +#include <asm/syscalls_64.h> +}; + +#undef __SYSCALL_64 +#undef __SYSCALL_X32 + +#ifdef CONFIG_X86_X32_ABI + +#define __SYSCALL_64(nr, sym, qual) +#define __SYSCALL_X32(nr, sym, qual) [nr] = sym, + +asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { + /* + * Smells like a compiler bug -- it doesn't work + * when the & below is removed. + */ + [0 ... __NR_syscall_x32_max] = &__x64_sys_ni_syscall, #include <asm/syscalls_64.h> }; + +#undef __SYSCALL_64 +#undef __SYSCALL_X32 + +#endif diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ad968b7bac72..8e5a3cc5eba2 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -124,13 +124,13 @@ 110 i386 iopl sys_iopl __ia32_sys_iopl 111 i386 vhangup sys_vhangup __ia32_sys_vhangup 112 i386 idle -113 i386 vm86old sys_vm86old sys_ni_syscall +113 i386 vm86old sys_vm86old __ia32_sys_ni_syscall 114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4 115 i386 swapoff sys_swapoff __ia32_sys_swapoff 116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo 117 i386 ipc sys_ipc __ia32_compat_sys_ipc 118 i386 fsync sys_fsync __ia32_sys_fsync -119 i386 sigreturn sys_sigreturn sys32_sigreturn +119 i386 sigreturn sys_sigreturn __ia32_compat_sys_sigreturn 120 i386 clone sys_clone __ia32_compat_sys_x86_clone 121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname 122 i386 uname sys_newuname __ia32_sys_newuname @@ -177,20 +177,20 @@ 163 i386 mremap sys_mremap __ia32_sys_mremap 164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16 165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16 -166 i386 vm86 sys_vm86 sys_ni_syscall +166 i386 vm86 sys_vm86 __ia32_sys_ni_syscall 167 i386 query_module 168 i386 poll sys_poll __ia32_sys_poll 169 i386 nfsservctl 170 i386 setresgid sys_setresgid16 __ia32_sys_setresgid16 171 i386 getresgid sys_getresgid16 __ia32_sys_getresgid16 172 i386 prctl sys_prctl __ia32_sys_prctl -173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn +173 i386 rt_sigreturn sys_rt_sigreturn __ia32_compat_sys_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction -175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask +175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending 177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo -179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend +179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend 180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread 181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite 182 i386 chown sys_chown16 __ia32_sys_chown16 diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh index 94fcd1951aca..1af2be39e7d9 100644 --- a/arch/x86/entry/syscalls/syscalltbl.sh +++ b/arch/x86/entry/syscalls/syscalltbl.sh @@ -1,13 +1,13 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 in="$1" out="$2" syscall_macro() { - abi="$1" - nr="$2" - entry="$3" + local abi="$1" + local nr="$2" + local entry="$3" # Entry can be either just a function name or "function/qualifier" real_entry="${entry%%/*}" @@ -21,14 +21,14 @@ syscall_macro() { } emit() { - abi="$1" - nr="$2" - entry="$3" - compat="$4" - umlentry="" + local abi="$1" + local nr="$2" + local entry="$3" + local compat="$4" + local umlentry="" - if [ "$abi" = "64" -a -n "$compat" ]; then - echo "a compat entry for a 64-bit syscall makes no sense" >&2 + if [ "$abi" != "I386" -a -n "$compat" ]; then + echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2 exit 1 fi @@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | ( while read nr abi name entry compat; do abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then - # COMMON is the same as 64, except that we don't expect X32 - # programs to use it. Our expectation has nothing to do with - # any generated code, so treat them the same. emit 64 "$nr" "$entry" "$compat" + if [ "$abi" = "COMMON" ]; then + # COMMON means that this syscall exists in the same form for + # 64-bit and X32. + echo "#ifdef CONFIG_X86_X32_ABI" + emit X32 "$nr" "$entry" "$compat" + echo "#endif" + fi elif [ "$abi" = "X32" ]; then - # X32 is equivalent to 64 on an X32-compatible kernel. echo "#ifdef CONFIG_X86_X32_ABI" - emit 64 "$nr" "$entry" "$compat" + emit X32 "$nr" "$entry" "$compat" echo "#endif" elif [ "$abi" = "I386" ]; then emit "$abi" "$nr" "$entry" "$compat" diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 42d4c89f990e..ddff0ca6f509 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include <linux/smp.h> #include <linux/kernel.h> #include <linux/mm_types.h> +#include <linux/elf.h> #include <asm/processor.h> #include <asm/vdso.h> diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index e7d35f60d53f..3ea8056148d8 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -5,12 +5,14 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <asm/apicdef.h> #include <asm/nmi.h> #include "../perf_event.h" -static DEFINE_PER_CPU(unsigned int, perf_nmi_counter); +static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); +static unsigned long perf_nmi_window; static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -244,6 +246,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, @@ -299,6 +302,25 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) return offset; } +/* + * AMD64 events are detected based on their event codes. + */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + +static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) +{ + if (!(x86_pmu.flags & PMU_FL_PAIR)) + return false; + + switch (amd_get_event_code(hwc)) { + case 0x003: return true; /* Retired SSE/AVX FLOPs */ + default: return false; + } +} + static int amd_core_hw_config(struct perf_event *event) { if (event->attr.exclude_host && event->attr.exclude_guest) @@ -317,14 +339,6 @@ static int amd_core_hw_config(struct perf_event *event) return 0; } -/* - * AMD64 events are detected based on their event codes. - */ -static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) -{ - return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); -} - static inline int amd_is_nb_event(struct hw_perf_event *hwc) { return (hwc->config & 0xe0) == 0xe0; @@ -641,11 +655,12 @@ static void amd_pmu_disable_event(struct perf_event *event) * handler when multiple PMCs are active or PMC overflow while handling some * other source of an NMI. * - * Attempt to mitigate this by using the number of active PMCs to determine - * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset - * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the - * number of active PMCs or 2. The value of 2 is used in case an NMI does not - * arrive at the LAPIC in time to be collapsed into an already pending NMI. + * Attempt to mitigate this by creating an NMI window in which un-handled NMIs + * received during this window will be claimed. This prevents extending the + * window past when it is possible that latent NMIs should be received. The + * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has + * handled a counter. When an un-handled NMI is received, it will be claimed + * only if arriving within that window. */ static int amd_pmu_handle_irq(struct pt_regs *regs) { @@ -663,21 +678,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) handled = x86_pmu_handle_irq(regs); /* - * If a counter was handled, record the number of possible remaining - * NMIs that can occur. + * If a counter was handled, record a timestamp such that un-handled + * NMIs will be claimed if arriving within that window. */ if (handled) { - this_cpu_write(perf_nmi_counter, - min_t(unsigned int, 2, active)); + this_cpu_write(perf_nmi_tstamp, + jiffies + perf_nmi_window); return handled; } - if (!this_cpu_read(perf_nmi_counter)) + if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) return NMI_DONE; - this_cpu_dec(perf_nmi_counter); - return NMI_HANDLED; } @@ -863,6 +876,20 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, } } +static struct event_constraint pair_constraint; + +static struct event_constraint * +amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (amd_is_pair_event_code(hwc)) + return &pair_constraint; + + return &unconstrained; +} + static ssize_t amd_event_sysfs_show(char *page, u64 config) { u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | @@ -906,29 +933,14 @@ static __initconst const struct x86_pmu amd_pmu = { static int __init amd_core_pmu_init(void) { + u64 even_ctr_mask = 0ULL; + int i; + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; - switch (boot_cpu_data.x86) { - case 0x15: - pr_cont("Fam15h "); - x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; - break; - case 0x17: - pr_cont("Fam17h "); - /* - * In family 17h, there are no event constraints in the PMC hardware. - * We fallback to using default amd_get_event_constraints. - */ - break; - case 0x18: - pr_cont("Fam18h "); - /* Using default amd_get_event_constraints. */ - break; - default: - pr_err("core perfctr but no constraints; unknown hardware!\n"); - return -ENODEV; - } + /* Avoid calculating the value each time in the NMI handler */ + perf_nmi_window = msecs_to_jiffies(100); /* * If core performance counter extensions exists, we must use @@ -944,6 +956,30 @@ static int __init amd_core_pmu_init(void) */ x86_pmu.amd_nb_constraints = 0; + if (boot_cpu_data.x86 == 0x15) { + pr_cont("Fam15h "); + x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; + } + if (boot_cpu_data.x86 >= 0x17) { + pr_cont("Fam17h+ "); + /* + * Family 17h and compatibles have constraints for Large + * Increment per Cycle events: they may only be assigned an + * even numbered counter that has a consecutive adjacent odd + * numbered counter following it. + */ + for (i = 0; i < x86_pmu.num_counters - 1; i += 2) + even_ctr_mask |= 1 << i; + + pair_constraint = (struct event_constraint) + __EVENT_CONSTRAINT(0, even_ctr_mask, 0, + x86_pmu.num_counters / 2, 0, + PERF_X86_EVENT_PAIR); + + x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; + x86_pmu.flags |= PMU_FL_PAIR; + } + pr_cont("core perfctr, "); return 0; } diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 5b35b7ea5d72..26c36357c4c9 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -377,7 +377,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { config &= ~perf_ibs->cnt_mask; - wrmsrl(hwc->config_base, config); + if (boot_cpu_data.x86 == 0x10) + wrmsrl(hwc->config_base, config); config &= ~perf_ibs->enable_mask; wrmsrl(hwc->config_base, config); } @@ -553,7 +554,8 @@ static struct perf_ibs perf_ibs_op = { }, .msr = MSR_AMD64_IBSOPCTL, .config_mask = IBS_OP_CONFIG_MASK, - .cnt_mask = IBS_OP_MAX_CNT, + .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | + IBS_OP_CUR_CNT_RAND, .enable_mask = IBS_OP_ENABLE, .valid_mask = IBS_OP_VAL, .max_period = IBS_OP_MAX_CNT << 4, @@ -614,7 +616,7 @@ fail: if (event->attr.sample_type & PERF_SAMPLE_RAW) offset_max = perf_ibs->offset_max; else if (check_rip) - offset_max = 2; + offset_max = 3; else offset_max = 1; do { diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a6ea07f2aa84..4d867a752f0e 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -190,15 +190,12 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; - - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -306,7 +303,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -317,7 +314,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3cd94a21bd53..bfcb077503e6 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -375,7 +375,7 @@ int x86_add_exclusive(unsigned int what) * LBR and BTS are still mutually exclusive. */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) - return 0; + goto out; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { mutex_lock(&pmc_reserve_mutex); @@ -387,6 +387,7 @@ int x86_add_exclusive(unsigned int what) mutex_unlock(&pmc_reserve_mutex); } +out: atomic_inc(&active_events); return 0; @@ -397,11 +398,15 @@ fail_unlock: void x86_del_exclusive(unsigned int what) { + atomic_dec(&active_events); + + /* + * See the comment in x86_add_exclusive(). + */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); - atomic_dec(&active_events); } int x86_setup_perfctr(struct perf_event *event) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 5ee3fed881d3..741540d849f3 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,9 +63,17 @@ struct bts_buffer { static struct pmu bts_pmu; +static int buf_nr_pages(struct page *page) +{ + if (!PagePrivate(page)) + return 1; + + return 1 << page_private(page); +} + static size_t buf_size(struct page *page) { - return 1 << (PAGE_SHIFT + page_private(page)); + return buf_nr_pages(page) * PAGE_SIZE; } static void * @@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, /* count all the high order buffers */ for (pg = 0, nbuf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); - if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1)) - return NULL; - pg += 1 << page_private(page); + pg += buf_nr_pages(page); nbuf++; } @@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, unsigned int __nr_pages; page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + __nr_pages = buf_nr_pages(page); buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2369ea1a1db7..512bf05751c6 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1891,8 +1891,8 @@ static __initconst const u64 tnt_hw_cache_extra_regs static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1), EVENT_EXTRA_END }; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 6601b8759c92..4e52c51c402e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1670,6 +1670,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) old = ((s64)(prev_raw_count << shift) >> shift); local64_add(new - old + count * period, &event->count); + local64_set(&hwc->period_left, -new); + perf_event_update_userpage(event); return 0; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 089bfcdf2f7f..c69ca14e8f49 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -486,10 +486,8 @@ void uncore_pmu_event_start(struct perf_event *event, int flags) local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); uncore_enable_event(box, event); - if (box->n_active == 1) { - uncore_enable_box(box); + if (box->n_active == 1) uncore_pmu_start_hrtimer(box); - } } void uncore_pmu_event_stop(struct perf_event *event, int flags) @@ -513,10 +511,8 @@ void uncore_pmu_event_stop(struct perf_event *event, int flags) WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; - if (box->n_active == 0) { - uncore_disable_box(box); + if (box->n_active == 0) uncore_pmu_cancel_hrtimer(box); - } } if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { @@ -762,6 +758,40 @@ static int uncore_pmu_event_init(struct perf_event *event) return ret; } +static void uncore_pmu_enable(struct pmu *pmu) +{ + struct intel_uncore_pmu *uncore_pmu; + struct intel_uncore_box *box; + + uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); + if (!uncore_pmu) + return; + + box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); + if (!box) + return; + + if (uncore_pmu->type->ops->enable_box) + uncore_pmu->type->ops->enable_box(box); +} + +static void uncore_pmu_disable(struct pmu *pmu) +{ + struct intel_uncore_pmu *uncore_pmu; + struct intel_uncore_box *box; + + uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); + if (!uncore_pmu) + return; + + box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); + if (!box) + return; + + if (uncore_pmu->type->ops->disable_box) + uncore_pmu->type->ops->disable_box(box); +} + static ssize_t uncore_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { @@ -787,6 +817,8 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) pmu->pmu = (struct pmu) { .attr_groups = pmu->type->attr_groups, .task_ctx_nr = perf_invalid_context, + .pmu_enable = uncore_pmu_enable, + .pmu_disable = uncore_pmu_disable, .event_init = uncore_pmu_event_init, .add = uncore_pmu_event_add, .del = uncore_pmu_event_del, diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 28499e39679f..8a9c3fbb51c9 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -429,18 +429,6 @@ static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box, return -EINVAL; } -static inline void uncore_disable_box(struct intel_uncore_box *box) -{ - if (box->pmu->type->ops->disable_box) - box->pmu->type->ops->disable_box(box); -} - -static inline void uncore_enable_box(struct intel_uncore_box *box) -{ - if (box->pmu->type->ops->enable_box) - box->pmu->type->ops->enable_box(box); -} - static inline void uncore_disable_event(struct intel_uncore_box *box, struct perf_event *event) { diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index f8431819b3e1..5c1da571a5aa 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,7 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_E3_IMC 0x1918 #define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c #define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 #define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 @@ -662,6 +663,10 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, @@ -802,6 +807,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(SKL_E3_IMC, &skl_uncore_pci_driver), /* Xeon E3 V5 Gen Core processor */ IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4e346856ee19..134c89d815d7 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -76,6 +76,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) #define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */ #define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ #define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */ +#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -726,6 +727,7 @@ do { \ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define PMU_FL_TFA 0x20 /* deal with TSX force abort */ +#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index dd92d8b438d4..18c3c2bb17b0 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -20,6 +20,7 @@ #include <linux/clockchips.h> #include <linux/hyperv.h> #include <linux/slab.h> +#include <linux/kernel.h> #include <linux/cpuhotplug.h> #ifdef CONFIG_HYPERV_TSCPAGE @@ -427,11 +428,14 @@ void hyperv_cleanup(void) } EXPORT_SYMBOL_GPL(hyperv_cleanup); -void hyperv_report_panic(struct pt_regs *regs, long err) +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) { static bool panic_reported; u64 guest_id; + if (in_die && !panic_on_oops) + return; + /* * We prefer to report panic on 'die' chain as we have proper * registers to report, but if we miss it (e.g. on BUG()) we need diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 629d1ee05599..08ee9e6da915 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -21,6 +21,7 @@ #include <linux/personality.h> #include <linux/compat.h> #include <linux/binfmts.h> +#include <linux/syscalls.h> #include <asm/ucontext.h> #include <linux/uaccess.h> #include <asm/fpu/internal.h> @@ -118,7 +119,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, return err; } -asmlinkage long sys32_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = current_pt_regs(); struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); @@ -144,7 +145,7 @@ badframe: return 0; } -asmlinkage long sys32_rt_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_ia32 __user *frame; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2e599384abd8..84402de885b9 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -53,7 +53,7 @@ extern int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; -extern unsigned int lapic_timer_frequency; +extern unsigned int lapic_timer_period; extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { @@ -448,6 +448,14 @@ static inline void ack_APIC_irq(void) apic_eoi(); } + +static inline bool lapic_vector_set_in_irr(unsigned int vector) +{ + u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + + return !!(irr & (1U << (vector % 32))); +} + static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index 31c379c1da41..0c814cd9ea42 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -9,6 +9,36 @@ #include <linux/mod_devicetable.h> +#define X86_CENTAUR_FAM6_C7_D 0xd +#define X86_CENTAUR_FAM6_NANO 0xf + +#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) + +/** + * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * Backport version to keep the SRBDS pile consistant. No shorter variants + * required for this. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ + .vendor = X86_VENDOR_##_vendor, \ + .family = _family, \ + .model = _model, \ + .steppings = _steppings, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ +} + /* * Match specific microcode revisions. * diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index cff3f3f3bfe0..6e9c9af3255a 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -78,8 +78,12 @@ struct cpu_entry_area { /* * The GDT is just below entry_stack and thus serves (on x86_64) as - * a a read-only guard page. + * a read-only guard page. On 32-bit the GDT must be writeable, so + * it needs an extra guard page. */ +#ifdef CONFIG_X86_32 + char guard_entry_stack[PAGE_SIZE]; +#endif struct entry_stack_page entry_stack_page; /* @@ -94,7 +98,6 @@ struct cpu_entry_area { */ struct cea_exception_stacks estacks; #endif -#ifdef CONFIG_CPU_SUP_INTEL /* * Per CPU debug store for Intel performance monitoring. Wastes a * full page at the moment. @@ -105,11 +108,13 @@ struct cpu_entry_area { * Reserve enough fixmap PTEs. */ struct debug_store_buffers cpu_debug_buffers; -#endif }; -#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +/* Total size includes the readonly IDT mapping page as well: */ +#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); @@ -117,13 +122,14 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); +/* Single page reserved for the readonly IDT mapping: */ #define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE #define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) #define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) extern struct cpu_entry_area *get_cpu_entry_area(int cpu); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6086e00f4b97..463395d7a306 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -351,6 +351,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ @@ -395,5 +396,6 @@ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index 0acf5ee45a21..ef5638f641f2 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_CRASH_H #define _ASM_X86_CRASH_H +struct kimage; + int crash_load_segments(struct kimage *image); int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 00f7cf45e699..8e95aa4b0d17 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -74,7 +74,7 @@ #define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT) /* 4GB broken PCI/AGP hardware bus master zone */ -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) +#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT)) #ifdef CONFIG_X86_32 /* The maximum address that we can perform a DMA transfer to on this platform */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 9da8cccdf3fb..6a295acd3de6 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -157,7 +157,7 @@ extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4c95c365058a..00eac7f1529b 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -509,7 +509,7 @@ static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) { - return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; + return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; } /* @@ -619,6 +619,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * MXCSR and XCR definitions: */ +static inline void ldmxcsr(u32 mxcsr) +{ + asm volatile("ldmxcsr %0" :: "m" (mxcsr)); +} + extern unsigned int mxcsr_feature_mask; #define XCR_XFEATURE_ENABLED_MASK 0x00000000 diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 5cbce6fbb534..296b346184b2 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -22,6 +22,35 @@ pop %_ASM_BP .endm +#ifdef CONFIG_X86_64 +/* + * This is a sneaky trick to help the unwinder find pt_regs on the stack. The + * frame pointer is replaced with an encoded pointer to pt_regs. The encoding + * is just setting the LSB, which makes it an invalid stack address and is also + * a signal to the unwinder that it's a pt_regs pointer in disguise. + * + * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts + * the original rbp. + */ +.macro ENCODE_FRAME_POINTER ptregs_offset=0 + leaq 1+\ptregs_offset(%rsp), %rbp +.endm +#else /* !CONFIG_X86_64 */ +/* + * This is a sneaky trick to help the unwinder find pt_regs on the stack. The + * frame pointer is replaced with an encoded pointer to pt_regs. The encoding + * is just clearing the MSB, which makes it an invalid stack address and is also + * a signal to the unwinder that it's a pt_regs pointer in disguise. + * + * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the + * original ebp. + */ +.macro ENCODE_FRAME_POINTER + mov %esp, %ebp + andl $0x7fffffff, %ebp +.endm +#endif /* CONFIG_X86_64 */ + #else /* !__ASSEMBLY__ */ #define FRAME_BEGIN \ @@ -30,12 +59,32 @@ #define FRAME_END "pop %" _ASM_BP "\n" +#ifdef CONFIG_X86_64 +#define ENCODE_FRAME_POINTER \ + "lea 1(%rsp), %rbp\n\t" +#else /* !CONFIG_X86_64 */ +#define ENCODE_FRAME_POINTER \ + "movl %esp, %ebp\n\t" \ + "andl $0x7fffffff, %ebp\n\t" +#endif /* CONFIG_X86_64 */ + #endif /* __ASSEMBLY__ */ #define FRAME_OFFSET __ASM_SEL(4, 8) #else /* !CONFIG_FRAME_POINTER */ +#ifdef __ASSEMBLY__ + +.macro ENCODE_FRAME_POINTER ptregs_offset=0 +.endm + +#else /* !__ASSEMBLY */ + +#define ENCODE_FRAME_POINTER + +#endif + #define FRAME_BEGIN #define FRAME_END #define FRAME_OFFSET 0 diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 82a57d344b9b..2a8e5f78e23c 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -61,6 +61,9 @@ #define INTEL_FAM6_TIGERLAKE_L 0x8C #define INTEL_FAM6_TIGERLAKE 0x8D +#define INTEL_FAM6_COMETLAKE 0xA5 +#define INTEL_FAM6_COMETLAKE_L 0xA6 + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 003f2daa3b0f..5e7d6b46de97 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -71,22 +71,6 @@ struct kimage; #define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ /* - * CPU does not save ss and sp on stack if execution is already - * running in kernel mode at the time of NMI occurrence. This code - * fixes it. - */ -static inline void crash_fixup_ss_esp(struct pt_regs *newregs, - struct pt_regs *oldregs) -{ -#ifdef CONFIG_X86_32 - newregs->sp = (unsigned long)&(oldregs->sp); - asm volatile("xorl %%eax, %%eax\n\t" - "movw %%ss, %%ax\n\t" - :"=a"(newregs->ss)); -#endif -} - -/* * This function is responsible for capturing register states if coming * via panic otherwise just fix up the ss and sp if coming via kernel * mode exception. @@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, { if (oldregs) { memcpy(newregs, oldregs, sizeof(*newregs)); - crash_fixup_ss_esp(newregs, oldregs); } else { #ifdef CONFIG_X86_32 asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4512200c33d2..c58b42c1011c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -385,12 +385,12 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - struct x86_exception *exception); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, + u32 access, struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, @@ -555,6 +555,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 host_pkru; u32 pkru; u32 hflags; u64 efer; @@ -570,6 +571,7 @@ struct kvm_vcpu_arch { u64 smbase; u64 smi_count; bool tpr_access_reporting; + bool xsaves_enabled; u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; @@ -620,7 +622,7 @@ struct kvm_vcpu_arch { * "guest_fpu" state here contains the guest FPU context, with the * host PRKU bits. */ - struct fpu user_fpu; + struct fpu *user_fpu; struct fpu *guest_fpu; u64 xcr0; @@ -672,10 +674,10 @@ struct kvm_vcpu_arch { bool pvclock_set_guest_stopped_request; struct { + u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_hva_cache stime; - struct kvm_steal_time steal; + struct gfn_to_pfn_cache cache; } st; u64 tsc_offset; @@ -1094,7 +1096,7 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); @@ -1124,8 +1126,9 @@ struct kvm_x86_ops { bool (*xsaves_supported)(void); bool (*umip_emulated)(void); bool (*pt_supported)(void); + bool (*pku_supported)(void); - int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1154,7 +1157,7 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); - int (*write_log_dirty)(struct kvm_vcpu *vcpu); + int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; @@ -1417,7 +1420,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); @@ -1585,8 +1588,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ - return (irq->delivery_mode == dest_Fixed || - irq->delivery_mode == dest_LowestPrio); + return (irq->delivery_mode == APIC_DM_FIXED || + irq->delivery_mode == APIC_DM_LOWEST); } static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 5ed3cf1c3934..9b4df6eaa11a 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); -void do_async_page_fault(struct pt_regs *regs, unsigned long error_code); +void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 209492849566..5c524d4f71cd 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5ff3e8af2c20..e78c7db87801 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -59,6 +59,7 @@ typedef struct { #define INIT_MM_CONTEXT(mm) \ .context = { \ .ctx_id = 1, \ + .lock = __MUTEX_INITIALIZER(mm.context.lock), \ } void leave_mm(int cpu); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cc60e617931c..ae44aa1f8a80 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -343,7 +343,7 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void hyperv_report_panic(struct pt_regs *regs, long err); +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); void hyperv_report_panic_msg(phys_addr_t pa, size_t size); bool hv_is_hyperv_initialized(void); void hyperv_cleanup(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bae74871c017..e1c4bf0fa66c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -110,6 +110,10 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ +/* SRBDS support */ +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 @@ -493,6 +497,8 @@ #define MSR_K7_HWCR 0xc0010015 #define MSR_K7_HWCR_SMMLOCK_BIT 0 #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_HWCR_IRPERF_EN_BIT 30 +#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 75ded1d13d98..9d5d949e662e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -41,7 +41,6 @@ struct nmiaction { struct list_head list; nmi_handler_t handler; u64 max_duration; - struct irq_work irq_work; unsigned long flags; const char *name; }; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d6f5ae2c79ab..dce26f1d13e1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -116,7 +116,7 @@ static inline void write_cr0(unsigned long x) static inline unsigned long read_cr2(void) { - return PVOP_CALL0(unsigned long, mmu.read_cr2); + return PVOP_CALLEE0(unsigned long, mmu.read_cr2); } static inline void write_cr2(unsigned long x) @@ -910,13 +910,7 @@ extern void default_banner(void); ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ ) -#endif - -#define GET_CR2_INTO_RAX \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); -#ifdef CONFIG_PARAVIRT_XXL #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ ANNOTATE_RETPOLINE_SAFE; \ @@ -930,9 +924,19 @@ extern void default_banner(void); call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif -#endif +#endif /* CONFIG_PARAVIRT_XXL */ +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_PARAVIRT_XXL + +#define GET_CR2_INTO_AX \ + PARA_SITE(PARA_PATCH(PV_MMU_read_cr2), \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); \ + ) + +#endif /* CONFIG_PARAVIRT_XXL */ -#endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 2474e434a6f7..8e46f1b764a3 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -220,7 +220,7 @@ struct pv_mmu_ops { void (*exit_mmap)(struct mm_struct *mm); #ifdef CONFIG_PARAVIRT_XXL - unsigned long (*read_cr2)(void); + struct paravirt_callee_save read_cr2; void (*write_cr2)(unsigned long); unsigned long (*read_cr3)(void); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5e0509b41986..c8e859cdead7 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -253,6 +253,7 @@ static inline int pmd_large(pmd_t pte) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; @@ -624,12 +625,15 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return __pmd(val); } -/* mprotect needs to preserve PAT bits when updating vm_page_prot */ +/* + * mprotect needs to preserve PAT and encryption bits when updating + * vm_page_prot + */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; - pgprotval_t addbits = pgprot_val(newprot); + pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 4fe9e7fc74d3..c78da8eda8f2 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -106,6 +106,6 @@ do { \ * with only a host target support using a 32-bit type for internal * representation. */ -#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) +#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f..1636eb8e5a5b 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,11 +44,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c * to avoid include recursion hell */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 39) -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ - & PMD_MASK) +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) #define LDT_BASE_ADDR \ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d6ff0bbdb394..9484b06291ec 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7781114574f0..f9300c86d95d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -331,7 +331,7 @@ struct x86_hw_tss { #define INVALID_IO_BITMAP_OFFSET 0x8000 struct entry_stack { - unsigned long words[64]; + char stack[PAGE_SIZE]; }; struct entry_stack_page { @@ -738,6 +738,7 @@ extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void load_percpu_segment(int); extern void cpu_init(void); +extern void cr4_init(void); static inline unsigned long get_debugctlmsr(void) { diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 8a7fc0cca2d1..3703c91f441e 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -166,14 +166,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #define compat_user_stack_pointer() current_pt_regs()->sp #endif -#ifdef CONFIG_X86_32 -extern unsigned long kernel_stack_pointer(struct pt_regs *regs); -#else static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->sp; } -#endif #define GET_IP(regs) ((regs)->ip) #define GET_FP(regs) ((regs)->bp) @@ -201,14 +197,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, if (unlikely(offset > MAX_REG_OFFSET)) return 0; #ifdef CONFIG_X86_32 - /* - * Traps from the kernel do not save sp and ss. - * Use the helper function to retrieve sp. - */ - if (offset == offsetof(struct pt_regs, sp) && - regs->cs == __KERNEL_CS) - return kernel_stack_pointer(regs); - /* The selector fields are 16-bit. */ if (offset == offsetof(struct pt_regs, cs) || offset == offsetof(struct pt_regs, ss) || @@ -234,8 +222,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, static inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { - return ((addr & ~(THREAD_SIZE - 1)) == - (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); + return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1))); } /** @@ -249,7 +236,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, */ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) { - unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + unsigned long *addr = (unsigned long *)regs->sp; addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index ac3892920419..6669164abadc 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -31,6 +31,18 @@ */ #define SEGMENT_RPL_MASK 0x3 +/* + * When running on Xen PV, the actual privilege level of the kernel is 1, + * not 0. Testing the Requested Privilege Level in a segment selector to + * determine whether the context is user mode or kernel mode with + * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level + * matches the 0x3 mask. + * + * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV + * kernels because privilege level 2 is never used. + */ +#define USER_SEGMENT_RPL_MASK 0x2 + /* User mode is privilege level 3: */ #define USER_RPL 0x3 diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ae7b909dc242..75d298cbc780 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -93,28 +93,35 @@ void set_kernel_text_rw(void); void set_kernel_text_ro(void); #ifdef CONFIG_X86_64 -static inline int set_mce_nospec(unsigned long pfn) +/* + * Prevent speculative access to the page by either unmapping + * it (if we do not require access to any part of the page) or + * marking it uncacheable (if we want to try to retrieve data + * from non-poisoned lines in the page). + */ +static inline int set_mce_nospec(unsigned long pfn, bool unmap) { unsigned long decoy_addr; int rc; /* - * Mark the linear address as UC to make sure we don't log more - * errors because of speculative access to the page. * We would like to just call: - * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1); + * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_uc() properly sanitizing any __pa() + * This relies on set_memory_XX() properly sanitizing any __pa() * results with __PHYSICAL_MASK or PTE_PFN_MASK. */ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - rc = set_memory_uc(decoy_addr, 1); + if (unmap) + rc = set_memory_np(decoy_addr, 1); + else + rc = set_memory_uc(decoy_addr, 1); if (rc) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 27c47d183f4b..8b58d6975d5d 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void) { unsigned long flags; - asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC, - X86_FEATURE_SMAP) + asm volatile ("# smap_save\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "pushf; pop %0; " __ASM_CLAC "\n\t" + "1:" : "=rm" (flags) : : "memory", "cc"); return flags; @@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { - asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP) + asm volatile ("# smap_restore\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "push %0; popf\n\t" + "1:" : : "g" (flags) : "memory", "cc"); } diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 0a3c4cab39db..219be88a59d2 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -6,6 +6,8 @@ #ifdef __KERNEL__ #include <asm/nops.h> +#include <asm/processor-flags.h> +#include <linux/jump_label.h> /* * Volatile isn't enough to prevent the compiler from reordering the @@ -16,6 +18,8 @@ */ extern unsigned long __force_order; +void native_write_cr0(unsigned long val); + static inline unsigned long native_read_cr0(void) { unsigned long val; @@ -23,11 +27,6 @@ static inline unsigned long native_read_cr0(void) return val; } -static inline void native_write_cr0(unsigned long val) -{ - asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); -} - static inline unsigned long native_read_cr2(void) { unsigned long val; @@ -72,10 +71,7 @@ static inline unsigned long native_read_cr4(void) return val; } -static inline void native_write_cr4(unsigned long val) -{ - asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); -} +void native_write_cr4(unsigned long val); #ifdef CONFIG_X86_64 static inline unsigned long native_read_cr8(void) diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 91e29b6a86a5..9804a7957f4e 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -55,8 +55,13 @@ /* * Initialize the stackprotector canary value. * - * NOTE: this must only be called from functions that never return, + * NOTE: this must only be called from functions that never return * and it must always be inlined. + * + * In addition, it should be called from a compilation unit for which + * stack protector is disabled. Alternatively, the caller should not end + * with a function call which gets tail-call optimized as that would + * lead to checking a modified canary value. */ static __always_inline void boot_init_stack_canary(void) { diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index a8d0cdf48616..14db05086bbf 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -78,7 +78,7 @@ static inline unsigned long * get_stack_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) - return (unsigned long *)kernel_stack_pointer(regs); + return (unsigned long *)regs->sp; if (task == current) return __builtin_frame_address(0); diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 2dc4a021beea..8db3fdb6102e 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[]; extern const sys_call_ptr_t ia32_sys_call_table[]; #endif +#ifdef CONFIG_X86_X32_ABI +extern const sys_call_ptr_t x32_sys_call_table[]; +#endif + /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e046a405743d..e2389ce9bf58 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,6 +6,8 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H +struct pt_regs; + /* Mapping of registers to parameters for syscalls on x86-64 and x32 */ #define SC_X86_64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ @@ -28,13 +30,21 @@ * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this * case as well. */ +#define __IA32_COMPAT_SYS_STUB0(x, name) \ + asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO); \ + asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys_##name(); \ + } + #define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ { \ return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + } #define __IA32_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ @@ -48,16 +58,23 @@ * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias * named __ia32_sys_*() */ -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ - asmlinkage long __x64_sys_##sname(void) -#define COND_SYSCALL(name) \ - cond_syscall(__x64_sys_##name); \ - cond_syscall(__ia32_sys_##name) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) + +#define COND_SYSCALL(name) \ + asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } \ + asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\ + { \ + return sys_ni_syscall(); \ + } #define SYS_NI(name) \ SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \ @@ -75,15 +92,24 @@ * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common * with x86_64 obviously do not need such care. */ +#define __X32_COMPAT_SYS_STUB0(x, name, ...) \ + asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO); \ + asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys_##name();\ + } + #define __X32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ { \ return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + } #else /* CONFIG_X86_X32 */ +#define __X32_COMPAT_SYS_STUB0(x, name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #endif /* CONFIG_X86_X32 */ @@ -94,6 +120,17 @@ * mapping of registers to parameters, we need to generate stubs for each * of them. */ +#define COMPAT_SYSCALL_DEFINE0(name) \ + static long __se_compat_sys_##name(void); \ + static inline long __do_compat_sys_##name(void); \ + __IA32_COMPAT_SYS_STUB0(x, name) \ + __X32_COMPAT_SYS_STUB0(x, name) \ + static long __se_compat_sys_##name(void) \ + { \ + return __do_compat_sys_##name(); \ + } \ + static inline long __do_compat_sys_##name(void) + #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -181,15 +218,19 @@ * macros to work correctly. */ #ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - asmlinkage long __x64_sys_##sname(void) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) #endif #ifndef COND_SYSCALL -#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name) +#define COND_SYSCALL(name) \ + asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } #endif #ifndef SYS_NI @@ -201,7 +242,6 @@ * For VSYSCALLS, we need to declare these three syscalls with the new * pt_regs-based calling convention for in-kernel use. */ -struct pt_regs; asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs); asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs); asmlinkage long __x64_sys_time(const struct pt_regs *regs); diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 880b5515b1d6..ebf185fa30bc 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -51,7 +51,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) #define INT3_INSN_SIZE 1 #define CALL_INSN_SIZE 5 -#ifdef CONFIG_X86_64 static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* @@ -69,7 +68,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); } -#endif /* CONFIG_X86_64 */ #endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f2bd284abc16..b25e633033c3 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_64 -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code); +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address); asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); asmlinkage __visible notrace struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); void __init trap_init(void); #endif dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); -dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code); +dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 146859efd83c..b923b8d333d4 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -5,12 +5,6 @@ #include <uapi/asm/unistd.h> -# ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT)) -# else -# define __SYSCALL_MASK (~0) -# endif - # ifdef CONFIG_X86_32 # include <asm/unistd_32.h> diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 499578f7e6d7..70fc159ebe69 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -19,7 +19,7 @@ struct unwind_state { #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; - struct pt_regs *regs; + struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h index 30d7d04d72d6..be5e2e747f50 100644 --- a/arch/x86/include/uapi/asm/unistd.h +++ b/arch/x86/include/uapi/asm/unistd.h @@ -2,7 +2,14 @@ #ifndef _UAPI_ASM_X86_UNISTD_H #define _UAPI_ASM_X86_UNISTD_H -/* x32 syscall flag bit */ +/* + * x32 syscall flag bit. Some user programs expect syscall NR macros + * and __X32_SYSCALL_BIT to have type int, even though syscall numbers + * are, for practical purposes, unsigned long. + * + * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right + * thing regardless. + */ #define __X32_SYSCALL_BIT 0x40000000 #ifndef __KERNEL__ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 17b33ef604f3..ea019ba02942 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1740,7 +1740,7 @@ int __acpi_acquire_global_lock(unsigned int *lock) new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); val = cmpxchg(lock, old, new); } while (unlikely (val != old)); - return (new < 3) ? -1 : 0; + return ((new & 0x3) < 3) ? -1 : 0; } int __acpi_release_global_lock(unsigned int *lock) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index a5e5484988fd..a5d650a15d55 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -146,7 +146,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ - retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); + retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx, + false); if (retval == 0) { /* Use the hint in CST */ percpu_entry->states[cx->index].eax = cx->address; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 390596b761e3..16f36f60a394 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -615,11 +615,98 @@ extern struct paravirt_patch_site __start_parainstructions[], __stop_parainstructions[]; #endif /* CONFIG_PARAVIRT */ +/* + * Self-test for the INT3 based CALL emulation code. + * + * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up + * properly and that there is a stack gap between the INT3 frame and the + * previous context. Without this gap doing a virtual PUSH on the interrupted + * stack would corrupt the INT3 IRET frame. + * + * See entry_{32,64}.S for more details. + */ + +/* + * We define the int3_magic() function in assembly to control the calling + * convention such that we can 'call' it from assembly. + */ + +extern void int3_magic(unsigned int *ptr); /* defined in asm */ + +asm ( +" .pushsection .init.text, \"ax\", @progbits\n" +" .type int3_magic, @function\n" +"int3_magic:\n" +" movl $1, (%" _ASM_ARG1 ")\n" +" ret\n" +" .size int3_magic, .-int3_magic\n" +" .popsection\n" +); + +extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ + +static int __init +int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + if (!regs || user_mode(regs)) + return NOTIFY_DONE; + + if (val != DIE_INT3) + return NOTIFY_DONE; + + if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip) + return NOTIFY_DONE; + + int3_emulate_call(regs, (unsigned long)&int3_magic); + return NOTIFY_STOP; +} + +static void __init int3_selftest(void) +{ + static __initdata struct notifier_block int3_exception_nb = { + .notifier_call = int3_exception_notify, + .priority = INT_MAX-1, /* last */ + }; + unsigned int val = 0; + + BUG_ON(register_die_notifier(&int3_exception_nb)); + + /* + * Basically: int3_magic(&val); but really complicated :-) + * + * Stick the address of the INT3 instruction into int3_selftest_ip, + * then trigger the INT3, padded with NOPs to match a CALL instruction + * length. + */ + asm volatile ("1: int3; nop; nop; nop; nop\n\t" + ".pushsection .init.data,\"aw\"\n\t" + ".align " __ASM_SEL(4, 8) "\n\t" + ".type int3_selftest_ip, @object\n\t" + ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t" + "int3_selftest_ip:\n\t" + __ASM_SEL(.long, .quad) " 1b\n\t" + ".popsection\n\t" + : ASM_CALL_CONSTRAINT + : __ASM_SEL_RAW(a, D) (&val) + : "memory"); + + BUG_ON(val != 1); + + unregister_die_notifier(&int3_exception_nb); +} + void __init alternative_instructions(void) { - /* The patching is not fully atomic, so try to avoid local interruptions - that might execute the to be patched code. - Other CPUs are not running. */ + int3_selftest(); + + /* + * The patching is not fully atomic, so try to avoid local + * interruptions that might execute the to be patched code. + * Other CPUs are not running. + */ stop_nmi(); /* @@ -644,10 +731,11 @@ void __init alternative_instructions(void) _text, _etext); } - if (!uniproc_patched || num_possible_cpus() == 1) + if (!uniproc_patched || num_possible_cpus() == 1) { free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); + } #endif apply_paravirt(__parainstructions, __parainstructions_end); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 8c26b696d893..8e283685f471 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -18,10 +18,13 @@ #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 +#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 +#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 +#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -32,6 +35,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) }, {} }; @@ -50,8 +54,10 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -65,7 +71,9 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a18d6dd934e5..8dadfd73c5ad 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -195,7 +195,7 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; -unsigned int lapic_timer_frequency = 0; +unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); @@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * According to Intel, MFENCE can do the serialization here. */ asm volatile("mfence" : : : "memory"); - - printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } @@ -501,7 +499,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) if (evt->features & CLOCK_EVT_FEAT_DUMMY) return 0; - __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1); + __setup_APIC_LVTT(lapic_timer_period, oneshot, 1); return 0; } @@ -552,7 +550,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); #define DEADLINE_MODEL_MATCH_REV(model, rev) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev } -static u32 hsx_deadline_rev(void) +static __init u32 hsx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x3a; /* EP */ @@ -562,7 +560,7 @@ static u32 hsx_deadline_rev(void) return ~0U; } -static u32 bdx_deadline_rev(void) +static __init u32 bdx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x00000011; @@ -574,7 +572,7 @@ static u32 bdx_deadline_rev(void) return ~0U; } -static u32 skx_deadline_rev(void) +static __init u32 skx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x03: return 0x01000136; @@ -587,7 +585,7 @@ static u32 skx_deadline_rev(void) return ~0U; } -static const struct x86_cpu_id deadline_match[] = { +static const struct x86_cpu_id deadline_match[] __initconst = { DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev), @@ -609,18 +607,19 @@ static const struct x86_cpu_id deadline_match[] = { {}, }; -static void apic_check_deadline_errata(void) +static __init bool apic_validate_deadline_timer(void) { const struct x86_cpu_id *m; u32 rev; - if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) || - boot_cpu_has(X86_FEATURE_HYPERVISOR)) - return; + if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return false; + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; m = x86_match_cpu(deadline_match); if (!m) - return; + return true; /* * Function pointers will have the MSB set due to address layout, @@ -632,11 +631,12 @@ static void apic_check_deadline_errata(void) rev = (u32)m->driver_data; if (boot_cpu_data.microcode >= rev) - return; + return true; setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " "please update microcode to version: 0x%x (or later)\n", rev); + return false; } /* @@ -805,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) static int __init lapic_init_clockevent(void) { - if (!lapic_timer_frequency) + if (!lapic_timer_period) return -1; /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, + lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR, TICK_NSEC, lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); @@ -840,7 +840,7 @@ static int __init calibrate_APIC_clock(void) */ if (!lapic_init_clockevent()) { apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", - lapic_timer_frequency); + lapic_timer_period); /* * Direct calibration methods must have an always running * local APIC timer, no need for broadcast timer. @@ -921,13 +921,13 @@ static int __init calibrate_APIC_clock(void) pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, &delta, &deltatsc); - lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; lapic_init_clockevent(); apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - lapic_timer_frequency); + lapic_timer_period); if (boot_cpu_has(X86_FEATURE_TSC)) { apic_printk(APIC_VERBOSE, "..... CPU clock speed is " @@ -938,13 +938,13 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "..... host bus clock speed is " "%u.%04u MHz.\n", - lapic_timer_frequency / (1000000 / HZ), - lapic_timer_frequency % (1000000 / HZ)); + lapic_timer_period / (1000000 / HZ), + lapic_timer_period % (1000000 / HZ)); /* * Do a sanity check on the APIC calibration result */ - if (lapic_timer_frequency < (1000000 / HZ)) { + if (lapic_timer_period < (1000000 / HZ)) { local_irq_enable(); pr_warning("APIC frequency too slow, disabling apic timer\n"); return -1; @@ -1540,9 +1540,6 @@ static void setup_local_APIC(void) { int cpu = smp_processor_id(); unsigned int value; -#ifdef CONFIG_X86_32 - int logical_apicid, ldr_apicid; -#endif if (disable_apic) { @@ -1583,16 +1580,21 @@ static void setup_local_APIC(void) apic->init_apic_ldr(); #ifdef CONFIG_X86_32 - /* - * APIC LDR is initialized. If logical_apicid mapping was - * initialized during get_smp_config(), make sure it matches the - * actual value. - */ - logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid); - /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + if (apic->dest_logical) { + int logical_apicid, ldr_apicid; + + /* + * APIC LDR is initialized. If logical_apicid mapping was + * initialized during get_smp_config(), make sure it matches + * the actual value. + */ + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + if (logical_apicid != BAD_APICID) + WARN_ON(logical_apicid != ldr_apicid); + /* Always use the value from LDR. */ + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + } #endif /* @@ -2035,7 +2037,8 @@ void __init init_apic_mappings(void) { unsigned int new_apicid; - apic_check_deadline_errata(); + if (apic_validate_deadline_timer()) + pr_info("TSC deadline timer available\n"); if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e8c6466ef65e..70718977ce7a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1726,9 +1726,10 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) static inline bool ioapic_irqd_mask(struct irq_data *data) { - /* If we are moving the irq we need to mask it */ + /* If we are moving the IRQ we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic_irq(data); + if (!irqd_irq_masked(data)) + mask_ioapic_irq(data); return true; } return false; @@ -1765,7 +1766,9 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) */ if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic_irq(data); + /* If the IRQ is masked in the core, leave it: */ + if (!irqd_irq_masked(data)) + unmask_ioapic_irq(data); } } #else diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index dad0dd759de2..e0c3c57ed7c0 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -23,10 +23,8 @@ static struct irq_domain *msi_default_domain; -static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) @@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) MSI_DATA_VECTOR(cfg->vector); } +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg); +} + +static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) +{ + struct msi_msg msg[2] = { [1] = { }, }; + + __irq_msi_compose_msg(cfg, msg); + irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); +} + +static int +msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) +{ + struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd); + struct irq_data *parent = irqd->parent_data; + unsigned int cpu; + int ret; + + /* Save the current configuration */ + cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); + old_cfg = *cfg; + + /* Allocate a new target vector */ + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + /* + * For non-maskable and non-remapped MSI interrupts the migration + * to a different destination CPU and a different vector has to be + * done careful to handle the possible stray interrupt which can be + * caused by the non-atomic update of the address/data pair. + * + * Direct update is possible when: + * - The MSI is maskable (remapped MSI does not use this code path)). + * The quirk bit is not set in this case. + * - The new vector is the same as the old vector + * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The new destination CPU is the same as the old destination CPU + */ + if (!irqd_msi_nomask_quirk(irqd) || + cfg->vector == old_cfg.vector || + old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + cfg->dest_apicid == old_cfg.dest_apicid) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Paranoia: Validate that the interrupt target is the local + * CPU. + */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Redirect the interrupt to the new vector on the current CPU + * first. This might cause a spurious interrupt on this vector if + * the device raises an interrupt right between this update and the + * update to the final destination CPU. + * + * If the vector is in use then the installed device handler will + * denote it as spurious which is no harm as this is a rare event + * and interrupt handlers have to cope with spurious interrupts + * anyway. If the vector is unused, then it is marked so it won't + * trigger the 'No irq handler for vector' warning in do_IRQ(). + * + * This requires to hold vector lock to prevent concurrent updates to + * the affected vector. + */ + lock_vector_lock(); + + /* + * Mark the new target vector on the local CPU if it is currently + * unused. Reuse the VECTOR_RETRIGGERED state which is also used in + * the CPU hotplug path for a similar purpose. This cannot be + * undone here as the current CPU has interrupts disabled and + * cannot handle the interrupt before the whole set_affinity() + * section is done. In the CPU unplug case, the current CPU is + * about to vanish and will not handle any interrupts anymore. The + * vector is cleaned up when the CPU comes online again. + */ + if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) + this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); + + /* Redirect it to the new vector on the local CPU temporarily */ + old_cfg.vector = cfg->vector; + irq_msi_update_msg(irqd, &old_cfg); + + /* Now transition it to the target CPU */ + irq_msi_update_msg(irqd, cfg); + + /* + * All interrupts after this point are now targeted at the new + * vector/CPU. + * + * Drop vector lock before testing whether the temporary assignment + * to the local CPU was hit by an interrupt raised in the device, + * because the retrigger function acquires vector lock again. + */ + unlock_vector_lock(); + + /* + * Check whether the transition raced with a device interrupt and + * is pending in the local APICs IRR. It is safe to do this outside + * of vector lock as the irq_desc::lock of this interrupt is still + * held and interrupts are disabled: The check is not accessing the + * underlying vector store. It's just checking the local APIC's + * IRR. + */ + if (lapic_vector_set_in_irr(cfg->vector)) + irq_data_get_irq_chip(irqd)->irq_retrigger(irqd); + + return ret; +} + /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. @@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_set_affinity = msi_set_affinity, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent) } if (!msi_default_domain) pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); + else + msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; } #ifdef CONFIG_IRQ_REMAP diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 2c5676b0a6e7..259d17d08983 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -446,12 +446,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, trace_vector_activate(irqd->irq, apicd->is_managed, apicd->can_reserve, reserve); - /* Nothing to do for fixed assigned vectors */ - if (!apicd->can_reserve && !apicd->is_managed) - return 0; - raw_spin_lock_irqsave(&vector_lock, flags); - if (reserve || irqd_is_managed_and_shutdown(irqd)) + if (!apicd->can_reserve && !apicd->is_managed) + assign_irq_vector_any_locked(irqd); + else if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); @@ -557,6 +555,16 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irqd->hwirq = virq + i; irqd_set_single_target(irqd); /* + * Prevent that any of these interrupts is invoked in + * non interrupt context via e.g. generic_handle_irq() + * as that can corrupt the affinity move state. + */ + irqd_set_handle_enforce_irqctx(irqd); + + /* Don't invoke affinity setter on deactivated interrupts */ + irqd_set_affinity_on_activate(irqd); + + /* * Legacy vectors are already assigned when the IOAPIC * takes them over. They stay on the same vector. This is * required for check_timer() to work correctly as it might @@ -769,20 +777,10 @@ void lapic_offline(void) static int apic_set_affinity(struct irq_data *irqd, const struct cpumask *dest, bool force) { - struct apic_chip_data *apicd = apic_chip_data(irqd); int err; - /* - * Core code can call here for inactive interrupts. For inactive - * interrupts which use managed or reservation mode there is no - * point in going through the vector assignment right now as the - * activation will assign a vector which fits the destination - * cpumask. Let the core code store the destination mask and be - * done with it. - */ - if (!irqd_is_activated(irqd) && - (apicd->is_managed || apicd->can_reserve)) - return IRQ_SET_MASK_OK; + if (WARN_ON_ONCE(!irqd_is_activated(irqd))) + return -EIO; raw_spin_lock(&vector_lock); cpumask_and(vector_searchmask, dest, cpu_online_mask); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 168543d077d7..5c7ee3df4d0b 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -38,7 +38,6 @@ static void __used common(void) #endif BLANK(); - OFFSET(TASK_TI_flags, task_struct, thread_info.flags); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); @@ -77,6 +76,7 @@ static void __used common(void) BLANK(); OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); + OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2); #endif BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d3d075226c0a..70e97727a26a 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -6,13 +6,28 @@ #include <asm/ia32.h> #define __SYSCALL_64(nr, sym, qual) [nr] = 1, +#define __SYSCALL_X32(nr, sym, qual) static char syscalls_64[] = { #include <asm/syscalls_64.h> }; +#undef __SYSCALL_64 +#undef __SYSCALL_X32 + +#ifdef CONFIG_X86_X32_ABI +#define __SYSCALL_64(nr, sym, qual) +#define __SYSCALL_X32(nr, sym, qual) [nr] = 1, +static char syscalls_x32[] = { +#include <asm/syscalls_64.h> +}; +#undef __SYSCALL_64 +#undef __SYSCALL_X32 +#endif + #define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls_ia32[] = { #include <asm/syscalls_32.h> }; +#undef __SYSCALL_I386 #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS) #include <asm/kvm_para.h> @@ -80,6 +95,11 @@ int main(void) DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); DEFINE(NR_syscalls, sizeof(syscalls_64)); +#ifdef CONFIG_X86_X32_ABI + DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1); + DEFINE(X32_NR_syscalls, sizeof(syscalls_x32)); +#endif + DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1); DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32)); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 68c363c341bf..fe3649f725dc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -27,6 +27,7 @@ static const int amd_erratum_383[]; static const int amd_erratum_400[]; +static const int amd_erratum_1054[]; static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); /* @@ -614,9 +615,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) return; clear_all: - clear_cpu_cap(c, X86_FEATURE_SME); + setup_clear_cpu_cap(X86_FEATURE_SME); clear_sev: - clear_cpu_cap(c, X86_FEATURE_SEV); + setup_clear_cpu_cap(X86_FEATURE_SEV); } } @@ -988,6 +989,15 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + /* + * Turn on the Instructions Retired free counter on machines not + * susceptible to erratum #1054 "Instructions Retired Performance + * Counter May Be Inaccurate". + */ + if (cpu_has(c, X86_FEATURE_IRPERF) && + !cpu_has_amd_erratum(c, amd_erratum_1054)) + msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); } #ifdef CONFIG_X86_32 @@ -1115,6 +1125,9 @@ static const int amd_erratum_400[] = static const int amd_erratum_383[] = AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); +/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ +static const int amd_erratum_1054[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 63f576836251..a277fa17c4ee 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -39,7 +39,9 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); +static void __init mds_print_mitigation(void); static void __init taa_select_mitigation(void); +static void __init srbds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -107,6 +109,13 @@ void __init check_bugs(void) l1tf_select_mitigation(); mds_select_mitigation(); taa_select_mitigation(); + srbds_select_mitigation(); + + /* + * As MDS and TAA mitigations are inter-related, print MDS + * mitigation until after TAA mitigation selection is done. + */ + mds_print_mitigation(); arch_smt_update(); @@ -245,6 +254,12 @@ static void __init mds_select_mitigation(void) (mds_nosmt || cpu_mitigations_auto_nosmt())) cpu_smt_disable(false); } +} + +static void __init mds_print_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) + return; pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -304,8 +319,12 @@ static void __init taa_select_mitigation(void) return; } - /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ - if (taa_mitigation == TAA_MITIGATION_OFF) + /* + * TAA mitigation via VERW is turned off if both + * tsx_async_abort=off and mds=off are specified. + */ + if (taa_mitigation == TAA_MITIGATION_OFF && + mds_mitigation == MDS_MITIGATION_OFF) goto out; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) @@ -339,6 +358,15 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); + /* + * Update MDS mitigation, if necessary, as the mds_user_clear is + * now enabled for TAA mitigation. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } out: pr_info("%s\n", taa_strings[taa_mitigation]); } @@ -365,6 +393,97 @@ static int __init tsx_async_abort_parse_cmdline(char *str) early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "SRBDS: " fmt + +enum srbds_mitigations { + SRBDS_MITIGATION_OFF, + SRBDS_MITIGATION_UCODE_NEEDED, + SRBDS_MITIGATION_FULL, + SRBDS_MITIGATION_TSX_OFF, + SRBDS_MITIGATION_HYPERVISOR, +}; + +static enum srbds_mitigations srbds_mitigation __ro_after_init = SRBDS_MITIGATION_FULL; + +static const char * const srbds_strings[] = { + [SRBDS_MITIGATION_OFF] = "Vulnerable", + [SRBDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [SRBDS_MITIGATION_FULL] = "Mitigation: Microcode", + [SRBDS_MITIGATION_TSX_OFF] = "Mitigation: TSX disabled", + [SRBDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status", +}; + +static bool srbds_off; + +void update_srbds_msr(void) +{ + u64 mcu_ctrl; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return; + + if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + + switch (srbds_mitigation) { + case SRBDS_MITIGATION_OFF: + case SRBDS_MITIGATION_TSX_OFF: + mcu_ctrl |= RNGDS_MITG_DIS; + break; + case SRBDS_MITIGATION_FULL: + mcu_ctrl &= ~RNGDS_MITG_DIS; + break; + default: + break; + } + + wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); +} + +static void __init srbds_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return; + + /* + * Check to see if this is one of the MDS_NO systems supporting + * TSX that are only exposed to SRBDS when TSX is enabled. + */ + ia32_cap = x86_read_arch_cap_msr(); + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; + else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; + else if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) + srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED; + else if (cpu_mitigations_off() || srbds_off) + srbds_mitigation = SRBDS_MITIGATION_OFF; + + update_srbds_msr(); + pr_info("%s\n", srbds_strings[srbds_mitigation]); +} + +static int __init srbds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return 0; + + srbds_off = !strcmp(str, "off"); + return 0; +} +early_param("srbds", srbds_parse_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt enum spectre_v1_mitigation { @@ -462,7 +581,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline); static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; -static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = +static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = SPECTRE_V2_USER_NONE; #ifdef CONFIG_RETPOLINE @@ -608,15 +729,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) break; } - /* - * At this point, an STIBP mode other than "off" has been set. - * If STIBP support is not being forced, check if STIBP always-on - * is preferred. - */ - if (mode != SPECTRE_V2_USER_STRICT && - boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) - mode = SPECTRE_V2_USER_STRICT_PREFERRED; - /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); @@ -639,23 +751,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); + + spectre_v2_user_ibpb = mode; } - /* If enhanced IBRS is enabled no STIBP required */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + /* + * If enhanced IBRS is enabled or SMT impossible, STIBP is not + * required. + */ + if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; /* - * If SMT is not possible or STIBP is not available clear the STIBP - * mode. + * At this point, an STIBP mode other than "off" has been set. + * If STIBP support is not being forced, check if STIBP always-on + * is preferred. */ - if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + if (mode != SPECTRE_V2_USER_STRICT && + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + + /* + * If STIBP is not available, clear the STIBP mode. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP)) mode = SPECTRE_V2_USER_NONE; + + spectre_v2_user_stibp = mode; + set_mode: - spectre_v2_user = mode; - /* Only print the STIBP mode when SMT possible */ - if (smt_possible) - pr_info("%s\n", spectre_v2_user_strings[mode]); + pr_info("%s\n", spectre_v2_user_strings[mode]); } static const char * const spectre_v2_strings[] = { @@ -888,7 +1013,7 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; case SPECTRE_V2_USER_STRICT: @@ -1131,14 +1256,19 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { case PR_SPEC_ENABLE: - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; /* * Indirect branch speculation is always disabled in strict - * mode. + * mode. It can neither be enabled if it was force-disabled + * by a previous prctl call. + */ - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + task_spec_ib_force_disable(task)) return -EPERM; task_clear_spec_ib_disable(task); task_update_spec_tif(task); @@ -1149,10 +1279,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always allowed when * mitigation is force disabled. */ - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) return 0; task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) @@ -1183,7 +1315,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); - if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -1214,22 +1347,24 @@ static int ib_prctl_get(struct task_struct *task) if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return PR_SPEC_NOT_AFFECTED; - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: - return PR_SPEC_DISABLE; - default: + } else return PR_SPEC_NOT_AFFECTED; - } } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) @@ -1468,7 +1603,7 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: @@ -1495,6 +1630,11 @@ static char *ibpb_state(void) return ""; } +static ssize_t srbds_show_state(char *buf) +{ + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -1539,6 +1679,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITLB_MULTIHIT: return itlb_multihit_show_state(buf); + case X86_BUG_SRBDS: + return srbds_show_state(buf); + default: break; } @@ -1585,4 +1728,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr { return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); } + +ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8253038fa85f..1c44715818e6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -366,6 +366,77 @@ out: cr4_clear_bits(X86_CR4_UMIP); } +/* These bits should not change their value after CPU init is finished. */ +static const unsigned long cr4_pinned_mask = + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; +static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +static unsigned long cr4_pinned_bits __ro_after_init; + +void native_write_cr0(unsigned long val) +{ + unsigned long bits_missing = 0; + +set_register: + asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { + bits_missing = X86_CR0_WP; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); + } +} +EXPORT_SYMBOL(native_write_cr0); + +void native_write_cr4(unsigned long val) +{ + unsigned long bits_changed = 0; + +set_register: + asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { + bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits; + val = (val & ~cr4_pinned_mask) | cr4_pinned_bits; + goto set_register; + } + /* Warn after we've corrected the changed bits. */ + WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n", + bits_changed); + } +} +EXPORT_SYMBOL(native_write_cr4); + +void cr4_init(void) +{ + unsigned long cr4 = __read_cr4(); + + if (boot_cpu_has(X86_FEATURE_PCID)) + cr4 |= X86_CR4_PCIDE; + if (static_branch_likely(&cr_pinning)) + cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits; + + __write_cr4(cr4); + + /* Initialize cr4 shadow for this CPU. */ + this_cpu_write(cpu_tlbstate.cr4, cr4); +} + +/* + * Once CPU feature detection is finished (and boot params have been + * parsed), record any of the sensitive CR bits that are set, and + * enable CR pinning. + */ +static void __init setup_cr_pinning(void) +{ + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask; + static_key_enable(&cr_pinning.key); +} + /* * Protection Keys are not available in 32-bit mode. */ @@ -393,7 +464,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -1010,9 +1081,30 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; -static bool __init cpu_matches(unsigned long which) +#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, steppings, \ + X86_FEATURE_ANY, issues) + +#define SRBDS BIT(0) + +static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0xC), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD), SRBDS), + {} +}; + +static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which) { - const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); + const struct x86_cpu_id *m = x86_match_cpu(table); return m && !!(m->driver_data & which); } @@ -1032,29 +1124,32 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) u64 ia32_cap = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ - if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && + !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); - if (cpu_matches(NO_SPECULATION)) + if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && + !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { + if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && + !(ia32_cap & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); - if (cpu_matches(MSBDS_ONLY)) + if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); } - if (!cpu_matches(NO_SWAPGS)) + if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS)) setup_force_cpu_bug(X86_BUG_SWAPGS); /* @@ -1072,7 +1167,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); - if (cpu_matches(NO_MELTDOWN)) + /* + * SRBDS affects CPUs which support RDRAND or RDSEED and are listed + * in the vulnerability blacklist. + */ + if ((cpu_has(c, X86_FEATURE_RDRAND) || + cpu_has(c, X86_FEATURE_RDSEED)) && + cpu_matches(cpu_vuln_blacklist, SRBDS)) + setup_force_cpu_bug(X86_BUG_SRBDS); + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -1081,7 +1185,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (cpu_matches(NO_L1TF)) + if (cpu_matches(cpu_vuln_whitelist, NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); @@ -1505,6 +1609,7 @@ void __init identify_boot_cpu(void) cpu_detect_tlb(&boot_cpu_data); tsx_init(); + setup_cr_pinning(); } void identify_secondary_cpu(struct cpuinfo_x86 *c) @@ -1517,6 +1622,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); + update_srbds_msr(); } static __init int setup_noclflush(char *arg) @@ -1739,12 +1845,6 @@ void cpu_init(void) wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - if (cpu) load_ucode_ap(); @@ -1839,12 +1939,6 @@ void cpu_init(void) wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - show_ucode_info_early(); pr_info("Initializing CPU#%d\n", cpu); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 38ab6e115eac..9d033693519a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -77,6 +77,7 @@ extern void detect_ht(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); +extern void update_srbds_msr(void); extern u64 x86_read_arch_cap_msr(void); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 92b107ab283f..c979a056a46f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +/* + * Processors which have self-snooping capability can handle conflicting + * memory type across CPUs by snooping its own cache. However, there exists + * CPU models in which having conflicting memory types still leads to + * unpredictable behavior, machine check errors, or hangs. Clear this + * feature to prevent its use on machines with known erratas. + */ +static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) +{ + switch (c->x86_model) { + case INTEL_FAM6_CORE_YONAH: + case INTEL_FAM6_CORE2_MEROM: + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_G: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_SANDYBRIDGE: + setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); + } +} + static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) @@ -239,9 +265,9 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ if (c->x86 == 6) { switch (c->x86_model) { - case 0x27: /* Penwell */ - case 0x35: /* Cloverview */ - case 0x4a: /* Merrifield */ + case INTEL_FAM6_ATOM_SALTWELL_MID: + case INTEL_FAM6_ATOM_SALTWELL_TABLET: + case INTEL_FAM6_ATOM_SILVERMONT_MID: set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); break; default: @@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) } check_mpx_erratum(c); + check_memory_type_self_snoop_errata(c); /* * Get the number of SMT siblings early from the extended topology diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 6dd78d8235e4..2f163e6646b6 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -34,13 +34,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) const struct x86_cpu_id *m; struct cpuinfo_x86 *c = &boot_cpu_data; - for (m = match; m->vendor | m->family | m->model | m->feature; m++) { + for (m = match; + m->vendor | m->family | m->model | m->steppings | m->feature; + m++) { if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) continue; if (m->family != X86_FAMILY_ANY && c->x86 != m->family) continue; if (m->model != X86_MODEL_ANY && c->x86_model != m->model) continue; + if (m->steppings != X86_STEPPING_ANY && + !(BIT(c->x86_stepping) & m->steppings)) + continue; if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) continue; return m; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 785050af85e5..f1c845416790 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -244,10 +244,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu) } /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid) + if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } @@ -1159,9 +1159,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1194,8 +1197,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1239,16 +1243,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1256,7 +1256,7 @@ recurse: if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1341,8 +1341,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1353,9 +1351,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); @@ -1364,8 +1366,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1375,13 +1381,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 282916f3b8d8..db1a8a1612f9 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -517,6 +517,13 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); +static bool whole_page(struct mce *m) +{ + if (!mca_cfg.ser || !(m->status & MCI_STATUS_MISCV)) + return true; + return MCI_MISC_ADDR_LSB(m->misc) >= PAGE_SHIFT; +} + bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) @@ -585,7 +592,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; if (!memory_failure(pfn, 0)) - set_mce_nospec(pfn); + set_mce_nospec(pfn, whole_page(mce)); } return NOTIFY_OK; @@ -797,8 +804,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); + m->bank = i; if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { - m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; @@ -1086,7 +1093,7 @@ static int do_memory_failure(struct mce *m) if (ret) pr_err("Memory error not recovered"); else - set_mce_nospec(m->addr >> PAGE_SHIFT); + set_mce_nospec(m->addr >> PAGE_SHIFT, whole_page(m)); return ret; } diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c index 7c8958dee103..6c9b91b773ef 100644 --- a/arch/x86/kernel/cpu/mce/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c @@ -328,6 +328,7 @@ static const struct file_operations mce_chrdev_ops = { .write = mce_chrdev_write, .poll = mce_chrdev_poll, .unlocked_ioctl = mce_chrdev_ioctl, + .compat_ioctl = compat_ptr_ioctl, .llseek = no_llseek, }; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 5d108f70f315..799b74b1582c 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -511,7 +511,7 @@ static void do_inject(void) */ if (inj_type == DFR_INT_INJ) { i_mce.status |= MCI_STATUS_DEFERRED; - i_mce.status |= (i_mce.status & ~MCI_STATUS_UC); + i_mce.status &= ~MCI_STATUS_UC; } /* diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index e43eb6732630..693c8cfac75d 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -489,17 +489,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index 6e2becf547c5..bc441d68d060 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -188,7 +188,7 @@ static void therm_throt_process(bool new_event, int event, int level) /* if we just entered the thermal event */ if (new_event) { if (event == THERMAL_THROTTLING_EVENT) - pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", + pr_warn("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 7df29f08871b..920aa2f0b52b 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -220,8 +220,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - pr_info("Hyper-V: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); @@ -256,6 +256,16 @@ static void __init ms_hyperv_init_platform(void) cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); } + /* + * Hyper-V expects to get crash register data or kmsg when + * crash enlightment is available and system crashes. Set + * crash_kexec_post_notifiers to be true to make sure that + * calling crash enlightment interface before running kdump + * kernel. + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + crash_kexec_post_notifiers = true; + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { @@ -266,9 +276,9 @@ static void __init ms_hyperv_init_platform(void) rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); - lapic_timer_frequency = hv_lapic_frequency; + lapic_timer_period = hv_lapic_frequency; pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + lapic_timer_period); } register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 03eb90d00af0..d8cc5223b7ce 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -578,6 +578,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); + rdt_domain_reconfigure_cdp(r); + if (r->alloc_capable && domain_setup_ctrlval(r, d)) { kfree(d); return; @@ -618,7 +620,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); list_del(&d->list); - if (is_mbm_enabled()) + if (r->mon_capable && is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { /* diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index efbd54cc4e69..055c8613b531 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -522,6 +522,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) int ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto out; + } md.priv = of->kn->priv; resid = md.u.rid; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index e49b77283924..3dd13f3a8b23 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -57,6 +57,7 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) } DECLARE_STATIC_KEY_FALSE(rdt_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); /** * struct mon_evt - Entry in the event list of a resource @@ -600,5 +601,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r); +void rdt_domain_reconfigure_cdp(struct rdt_resource *r); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 397206f23d14..773124b0e18a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -514,7 +514,7 @@ void mbm_handle_overflow(struct work_struct *work) mutex_lock(&rdtgroup_mutex); - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); @@ -543,7 +543,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms) unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) return; cpu = cpumask_any(&dom->cpu_mask); dom->mbm_work_cpu = cpu; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2f4824793798..81200c253002 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, } rdtgrp = rdtgroup_kn_lock_live(of->kn); - rdt_last_cmd_clear(); if (!rdtgrp) { ret = -ENOENT; - rdt_last_cmd_puts("Directory was removed\n"); goto unlock; } @@ -1029,6 +1027,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { _r_cdp = NULL; + _d_cdp = NULL; ret = -EINVAL; } @@ -1743,9 +1742,6 @@ static int set_cache_qos_cfg(int level, bool enable) struct rdt_domain *d; int cpu; - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; - if (level == RDT_RESOURCE_L3) update = l3_qos_cfg_update; else if (level == RDT_RESOURCE_L2) @@ -1753,6 +1749,9 @@ static int set_cache_qos_cfg(int level, bool enable) else return -EINVAL; + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + r_l = &rdt_resources_all[level]; list_for_each_entry(d, &r_l->domains, list) { /* Pick one CPU from each domain instance to update MSR */ @@ -1771,6 +1770,19 @@ static int set_cache_qos_cfg(int level, bool enable) return 0; } +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + if (!r->alloc_capable) + return; + + if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) + l2_qos_cfg_update(&r->alloc_enabled); + + if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) + l3_qos_cfg_update(&r->alloc_enabled); +} + /* * Enable or disable the MBA software controller * which helps user specify bandwidth in MBps. @@ -1972,7 +1984,7 @@ static int rdt_get_tree(struct fs_context *fc) if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, - NULL, "mon_groups", + &rdtgroup_default, "mon_groups", &kn_mongrp); if (ret < 0) goto out_info; @@ -2208,7 +2220,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { free_rmid(sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); - kfree(sentry); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + kfree(sentry); } } @@ -2246,7 +2262,11 @@ static void rmdir_all_sub(void) kernfs_remove(rdtgrp->kn); list_del(&rdtgrp->rdtgroup_list); - kfree(rdtgrp); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2449,7 +2469,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, /* * Create the mon_data directory first. */ - ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); if (ret) return ret; @@ -2648,11 +2668,9 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret; - prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); - rdt_last_cmd_clear(); + prdtgrp = rdtgroup_kn_lock_live(parent_kn); if (!prdtgrp) { ret = -ENODEV; - rdt_last_cmd_puts("Directory was removed\n"); goto out_unlock; } @@ -2723,7 +2741,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn); /* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0; @@ -2734,7 +2752,7 @@ out_destroy: out_free_rgrp: kfree(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2772,7 +2790,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, */ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2815,7 +2833,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); goto out_del_list; @@ -2831,7 +2849,7 @@ out_id_free: out_common_fail: mkdir_rdt_prepare_clean(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2957,13 +2975,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); + rdtgroup_ctrl_remove(kn, rdtgrp); + /* * Free all the child monitor group rmids. */ free_all_child_rdtgrp(rdtgrp); - rdtgroup_ctrl_remove(kn, rdtgrp); - return 0; } @@ -2990,7 +3008,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) * If the rdtgroup is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) { + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { ret = rdtgroup_ctrl_remove(kn, rdtgrp); diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 3e20d322bc98..032509adf9de 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -115,11 +115,12 @@ void __init tsx_init(void) tsx_disable(); /* - * tsx_disable() will change the state of the - * RTM CPUID bit. Clear it here since it is now - * expected to be not set. + * tsx_disable() will change the state of the RTM and HLE CPUID + * bits. Clear them here since they are now expected to be not + * set. */ setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { /* @@ -131,10 +132,10 @@ void __init tsx_init(void) tsx_enable(); /* - * tsx_enable() will change the state of the - * RTM CPUID bit. Force it here since it is now - * expected to be set. + * tsx_enable() will change the state of the RTM and HLE CPUID + * bits. Force them here since they are now expected to be set. */ setup_force_cpu_cap(X86_FEATURE_RTM); + setup_force_cpu_cap(X86_FEATURE_HLE); } } diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 34ab4161fc39..88569536d529 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -158,7 +158,7 @@ static void __init vmware_platform_setup(void) #ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ - lapic_timer_frequency = ecx / HZ; + lapic_timer_period = ecx / HZ; pr_info("Host bus clock speed read from hypervisor : %u Hz\n", ecx); #endif diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 576b2e1bfc12..84e2d3ddd0eb 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -73,14 +73,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { -#ifdef CONFIG_X86_32 - struct pt_regs fixed_regs; - - if (!user_mode(regs)) { - crash_fixup_ss_esp(&fixed_regs, regs); - regs = &fixed_regs; - } -#endif crash_save_cpu(regs, cpu); /* diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 0b8cedb20d6d..d5c9b13bafdf 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -65,6 +65,9 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = { .ss = __KERNEL_DS, .ds = __USER_DS, .fs = __KERNEL_PERCPU, +#ifndef CONFIG_X86_32_LAZY_GS + .gs = __KERNEL_STACK_CANARY, +#endif .__cr3 = __pa_nodebug(swapper_pg_dir), }; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 753b8cfe8b8a..87b97897a881 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -94,6 +94,13 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info) BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); begin = (unsigned long)__this_cpu_read(cea_exception_stacks); + /* + * Handle the case where stack trace is collected _before_ + * cea_exception_stacks had been initialized. + */ + if (!begin) + return false; + end = begin + sizeof(struct cea_exception_stacks); /* Bail if @stack is outside the exception stack area. */ if (stk < begin || stk >= end) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6c4f01540833..7c9259306d74 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -709,6 +709,12 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x3e20, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x3ec4, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x8a12, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 649fbc3fcf9f..8ba0e53dec75 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -113,6 +113,12 @@ static void __kernel_fpu_begin(void) } } __cpu_invalidate_fpregs_state(); + + if (boot_cpu_has(X86_FEATURE_XMM)) + ldmxcsr(MXCSR_DEFAULT); + + if (boot_cpu_has(X86_FEATURE_FPU)) + asm volatile ("fninit"); } static void __kernel_fpu_end(void) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 0071b794ed19..400a05e1c1c5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); return 0; } + fpregs_deactivate(fpu); fpregs_unlock(); } @@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } if (!ret) fpregs_mark_activate(); + else + fpregs_deactivate(fpu); fpregs_unlock(); err_out: diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3c36dd1784db..085afecd878c 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -959,18 +959,31 @@ static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) return true; } -/* - * This is similar to user_regset_copyout(), but will not add offset to - * the source data pointer or increment pos, count, kbuf, and ubuf. - */ -static inline void -__copy_xstate_to_kernel(void *kbuf, const void *data, - unsigned int offset, unsigned int size, unsigned int size_total) +static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count) { - if (offset < size_total) { - unsigned int copy = min(size, size_total - offset); + if (*pos < to) { + unsigned size = to - *pos; + + if (size > *count) + size = *count; + memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size); + *kbuf += size; + *pos += size; + *count -= size; + } +} - memcpy(kbuf + offset, data, copy); +static void copy_part(unsigned offset, unsigned size, void *from, + void **kbuf, unsigned *pos, unsigned *count) +{ + fill_gap(offset, kbuf, pos, count); + if (size > *count) + size = *count; + if (size) { + memcpy(*kbuf, from, size); + *kbuf += size; + *pos += size; + *count -= size; } } @@ -983,8 +996,9 @@ __copy_xstate_to_kernel(void *kbuf, const void *data, */ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) { - unsigned int offset, size; struct xstate_header header; + const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr); + unsigned count = size_total; int i; /* @@ -1000,46 +1014,42 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of header.xfeatures = xsave->header.xfeatures; header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; + if (header.xfeatures & XFEATURE_MASK_FP) + copy_part(0, off_mxcsr, + &xsave->i387, &kbuf, &offset_start, &count); + if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)) + copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE, + &xsave->i387.mxcsr, &kbuf, &offset_start, &count); + if (header.xfeatures & XFEATURE_MASK_FP) + copy_part(offsetof(struct fxregs_state, st_space), 128, + &xsave->i387.st_space, &kbuf, &offset_start, &count); + if (header.xfeatures & XFEATURE_MASK_SSE) + copy_part(xstate_offsets[XFEATURE_SSE], 256, + &xsave->i387.xmm_space, &kbuf, &offset_start, &count); + /* + * Fill xsave->i387.sw_reserved value for ptrace frame: + */ + copy_part(offsetof(struct fxregs_state, sw_reserved), 48, + xstate_fx_sw_bytes, &kbuf, &offset_start, &count); /* * Copy xregs_state->header: */ - offset = offsetof(struct xregs_state, header); - size = sizeof(header); - - __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total); + copy_part(offsetof(struct xregs_state, header), sizeof(header), + &header, &kbuf, &offset_start, &count); - for (i = 0; i < XFEATURE_MAX; i++) { + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { /* * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { void *src = __raw_xsave_addr(xsave, i); - offset = xstate_offsets[i]; - size = xstate_sizes[i]; - - /* The next component has to fit fully into the output buffer: */ - if (offset + size > size_total) - break; - - __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); + copy_part(xstate_offsets[i], xstate_sizes[i], + src, &kbuf, &offset_start, &count); } } - - if (xfeatures_mxcsr_quirk(header.xfeatures)) { - offset = offsetof(struct fxregs_state, mxcsr); - size = MXCSR_AND_FLAGS_SIZE; - __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total); - } - - /* - * Fill xsave->i387.sw_reserved value for ptrace frame: - */ - offset = offsetof(struct fxregs_state, sw_reserved); - size = sizeof(xstate_fx_sw_bytes); - - __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total); + fill_gap(size_total, &kbuf, &offset_start, &count); return 0; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 76228525acd0..4b73f5937f41 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -310,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs) ip = regs->ip - INT3_INSN_SIZE; -#ifdef CONFIG_X86_64 if (ftrace_location(ip)) { int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); return 1; @@ -322,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs) int3_emulate_call(regs, ftrace_update_func_call); return 1; } -#else - if (ftrace_location(ip) || is_ftrace_caller(ip)) { - int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); - return 1; - } -#endif return 0; } diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index 2ba914a34b06..073aab525d80 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -9,6 +9,8 @@ #include <asm/export.h> #include <asm/ftrace.h> #include <asm/nospec-branch.h> +#include <asm/frame.h> +#include <asm/asm-offsets.h> # define function_hook __fentry__ EXPORT_SYMBOL(__fentry__) @@ -89,26 +91,38 @@ END(ftrace_caller) ENTRY(ftrace_regs_caller) /* - * i386 does not save SS and ESP when coming from kernel. - * Instead, to get sp, ®s->sp is used (see ptrace.h). - * Unfortunately, that means eflags must be at the same location - * as the current return ip is. We move the return ip into the - * regs->ip location, and move flags into the return ip location. + * We're here from an mcount/fentry CALL, and the stack frame looks like: + * + * <previous context> + * RET-IP + * + * The purpose of this function is to call out in an emulated INT3 + * environment with a stack frame like: + * + * <previous context> + * gap / RET-IP + * gap + * gap + * gap + * pt_regs + * + * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds */ - pushl $__KERNEL_CS - pushl 4(%esp) /* Save the return ip */ - pushl $0 /* Load 0 into orig_ax */ + subl $3*4, %esp # RET-IP + 3 gaps + pushl %ss # ss + pushl %esp # points at ss + addl $5*4, (%esp) # make it point at <previous context> + pushfl # flags + pushl $__KERNEL_CS # cs + pushl 7*4(%esp) # ip <- RET-IP + pushl $0 # orig_eax + pushl %gs pushl %fs pushl %es pushl %ds - pushl %eax - - /* Get flags and place them into the return ip slot */ - pushf - popl %eax - movl %eax, 8*4(%esp) + pushl %eax pushl %ebp pushl %edi pushl %esi @@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller) pushl %ecx pushl %ebx - movl 12*4(%esp), %eax /* Load ip (1st parameter) */ - subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ - movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */ - movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ - pushl %esp /* Save pt_regs as 4th parameter */ + ENCODE_FRAME_POINTER + + movl PT_EIP(%esp), %eax # 1st argument: IP + subl $MCOUNT_INSN_SIZE, %eax + movl 21*4(%esp), %edx # 2nd argument: parent ip + movl function_trace_op, %ecx # 3rd argument: ftrace_pos + pushl %esp # 4th argument: pt_regs GLOBAL(ftrace_regs_call) call ftrace_stub - addl $4, %esp /* Skip pt_regs */ + addl $4, %esp # skip 4th argument - /* restore flags */ - push 14*4(%esp) - popf + /* place IP below the new SP */ + movl PT_OLDESP(%esp), %eax + movl PT_EIP(%esp), %ecx + movl %ecx, -4(%eax) - /* Move return ip back to its original location */ - movl 12*4(%esp), %eax - movl %eax, 14*4(%esp) + /* place EAX below that */ + movl PT_EAX(%esp), %ecx + movl %ecx, -8(%eax) popl %ebx popl %ecx @@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call) popl %esi popl %edi popl %ebp - popl %eax - popl %ds - popl %es - popl %fs - popl %gs - /* use lea to not affect flags */ - lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */ + lea -8(%eax), %esp + popl %eax jmp .Lftrace_ret diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 10eb2760ef2c..809d54397dba 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -9,6 +9,7 @@ #include <asm/export.h> #include <asm/nospec-branch.h> #include <asm/unwind_hints.h> +#include <asm/frame.h> .code64 .section .entry.text, "ax" @@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr) leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx movq %rcx, RSP(%rsp) + ENCODE_FRAME_POINTER + /* regs go into 4th parameter */ leaq (%rsp), %rcx diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 30f9cb2c0b55..2e6a0676c1f4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -571,6 +571,16 @@ ENTRY(initial_page_table) # error "Kernel PMDs should be 1, 2 or 3" # endif .align PAGE_SIZE /* needs to be page-sized too */ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * PTI needs another page so sync_initial_pagetable() works correctly + * and does not scribble over the data which is placed behind the + * actual initial_page_table. See clone_pgd_range(). + */ + .fill 1024, 4, 0 +#endif + #endif .data diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index bcd206c8ac90..0e2d72929a8c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -29,9 +29,7 @@ #ifdef CONFIG_PARAVIRT_XXL #include <asm/asm-offsets.h> #include <asm/paravirt.h> -#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg #else -#define GET_CR2_INTO(reg) movq %cr2, reg #define INTERRUPT_RETURN iretq #endif @@ -323,7 +321,7 @@ early_idt_handler_common: cmpq $14,%rsi /* Page fault? */ jnz 10f - GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ + GET_CR2_INTO(%rdi) /* can clobber %rax if pv */ call early_make_pgtable andl %eax,%eax jz 20f /* All good */ diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 519649ddf100..fe522691ac71 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -207,7 +207,7 @@ spurious_8259A_irq: * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG + printk_deferred(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 87ef69a72c52..7bb4c3cbf4dc 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -318,7 +318,11 @@ void __init idt_setup_apic_and_irq_gates(void) #ifdef CONFIG_X86_LOCAL_APIC for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { - set_bit(i, system_vectors); + /* + * Don't set the non assigned system vectors in the + * system_vectors bitmap. Otherwise they show up in + * /proc/interrupts. + */ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); set_intr_gate(i, entry); } diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index 4c407833faca..fa188de3f424 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -10,8 +10,6 @@ extern struct boot_params boot_params; static enum efi_secureboot_mode get_sb_mode(void) { - efi_char16_t efi_SecureBoot_name[] = L"SecureBoot"; - efi_char16_t efi_SetupMode_name[] = L"SecureBoot"; efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; efi_status_t status; unsigned long size; @@ -25,7 +23,7 @@ static enum efi_secureboot_mode get_sb_mode(void) } /* Get variable contents into buffer */ - status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid, + status = efi.get_variable(L"SecureBoot", &efi_variable_guid, NULL, &size, &secboot); if (status == EFI_NOT_FOUND) { pr_info("ima: secureboot mode disabled\n"); @@ -38,7 +36,7 @@ static enum efi_secureboot_mode get_sb_mode(void) } size = sizeof(setupmode); - status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid, + status = efi.get_variable(L"SetupMode", &efi_variable_guid, NULL, &size, &setupmode); if (status != EFI_SUCCESS) /* ignore unknown SetupMode */ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 6bf6517a05bb..5bfa35f3dbac 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -52,7 +52,7 @@ static int map_irq_stack(unsigned int cpu) pages[i] = pfn_to_page(pa >> PAGE_SHIFT); } - va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, VM_MAP, PAGE_KERNEL); if (!va) return -ENOMEM; diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 1b2ee55a2dfb..ba95bc70460d 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now) static void __init jailhouse_timer_init(void) { - lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); + lapic_timer_period = setup_data.apic_khz * (1000 / HZ); } static unsigned long jailhouse_get_tsc(void) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 6690c5652aeb..23297ea64f5f 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) #ifdef CONFIG_X86_32 switch (regno) { - case GDB_SS: - if (!user_mode(regs)) - *(unsigned long *)mem = __KERNEL_DS; - break; - case GDB_SP: - if (!user_mode(regs)) - *(unsigned long *)mem = kernel_stack_pointer(regs); - break; case GDB_GS: case GDB_FS: *(unsigned long *)mem = 0xFFFF; diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 2b949f4fd4d8..7d3a2e2daf01 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -5,15 +5,10 @@ /* Kprobes and Optprobes common header */ #include <asm/asm.h> - -#ifdef CONFIG_FRAME_POINTER -# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \ - " mov %" _ASM_SP ", %" _ASM_BP "\n" -#else -# define SAVE_RBP_STRING " push %" _ASM_BP "\n" -#endif +#include <asm/frame.h> #ifdef CONFIG_X86_64 + #define SAVE_REGS_STRING \ /* Skip cs, ip, orig_ax. */ \ " subq $24, %rsp\n" \ @@ -27,11 +22,13 @@ " pushq %r10\n" \ " pushq %r11\n" \ " pushq %rbx\n" \ - SAVE_RBP_STRING \ + " pushq %rbp\n" \ " pushq %r12\n" \ " pushq %r13\n" \ " pushq %r14\n" \ - " pushq %r15\n" + " pushq %r15\n" \ + ENCODE_FRAME_POINTER + #define RESTORE_REGS_STRING \ " popq %r15\n" \ " popq %r14\n" \ @@ -51,19 +48,22 @@ /* Skip orig_ax, ip, cs */ \ " addq $24, %rsp\n" #else + #define SAVE_REGS_STRING \ /* Skip cs, ip, orig_ax and gs. */ \ - " subl $16, %esp\n" \ + " subl $4*4, %esp\n" \ " pushl %fs\n" \ " pushl %es\n" \ " pushl %ds\n" \ " pushl %eax\n" \ - SAVE_RBP_STRING \ + " pushl %ebp\n" \ " pushl %edi\n" \ " pushl %esi\n" \ " pushl %edx\n" \ " pushl %ecx\n" \ - " pushl %ebx\n" + " pushl %ebx\n" \ + ENCODE_FRAME_POINTER + #define RESTORE_REGS_STRING \ " popl %ebx\n" \ " popl %ecx\n" \ @@ -72,8 +72,8 @@ " popl %edi\n" \ " popl %ebp\n" \ " popl %eax\n" \ - /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ - " addl $24, %esp\n" + /* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\ + " addl $7*4, %esp\n" #endif /* Ensure if the instruction can be boostable */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6afd8061dbae..27159a40dc93 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -56,7 +56,7 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) +#define stack_addr(regs) ((unsigned long *)regs->sp) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -718,29 +718,27 @@ asm( ".global kretprobe_trampoline\n" ".type kretprobe_trampoline, @function\n" "kretprobe_trampoline:\n" -#ifdef CONFIG_X86_64 /* We don't bother saving the ss register */ +#ifdef CONFIG_X86_64 " pushq %rsp\n" " pushfq\n" SAVE_REGS_STRING " movq %rsp, %rdi\n" " call trampoline_handler\n" /* Replace saved sp with true return address. */ - " movq %rax, 152(%rsp)\n" + " movq %rax, 19*8(%rsp)\n" RESTORE_REGS_STRING " popfq\n" #else - " pushf\n" + " pushl %esp\n" + " pushfl\n" SAVE_REGS_STRING " movl %esp, %eax\n" " call trampoline_handler\n" - /* Move flags to cs */ - " movl 56(%esp), %edx\n" - " movl %edx, 52(%esp)\n" - /* Replace saved flags with true return address. */ - " movl %eax, 56(%esp)\n" + /* Replace saved sp with true return address. */ + " movl %eax, 15*4(%esp)\n" RESTORE_REGS_STRING - " popf\n" + " popfl\n" #endif " ret\n" ".size kretprobe_trampoline, .-kretprobe_trampoline\n" @@ -748,16 +746,11 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); -static struct kprobe kretprobe_kprobe = { - .addr = (void *)kretprobe_trampoline, -}; - /* * Called from kretprobe_trampoline */ __used __visible void *trampoline_handler(struct pt_regs *regs) { - struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -767,30 +760,23 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; - preempt_disable(); - /* * Set a dummy kprobe for avoiding kretprobe recursion. * Since kretprobe never run in kprobe handler, kprobe must not * be running at this point. */ - kcb = get_kprobe_ctlblk(); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; + kprobe_busy_begin(); INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ -#ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; - /* On x86-64, we use pt_regs->sp for return address holder. */ - frame_pointer = ®s->sp; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); +#ifdef CONFIG_X86_32 + regs->cs |= get_kernel_rpl(); regs->gs = 0; - /* On x86-32, we use pt_regs->flags for return address holder. */ - frame_pointer = ®s->flags; #endif + /* We use pt_regs->sp for return address holder. */ + frame_pointer = ®s->sp; regs->ip = trampoline_address; regs->orig_ax = ~0UL; @@ -855,7 +841,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) __this_cpu_write(current_kprobe, &ri->rp->kp); ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); + __this_cpu_write(current_kprobe, &kprobe_busy); } recycle_rp_inst(ri, &empty_rp); @@ -871,8 +857,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - __this_cpu_write(current_kprobe, NULL); - preempt_enable(); + kprobe_busy_end(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 7c361a24c6df..282b4eb67e30 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -102,14 +102,15 @@ asm ( "optprobe_template_call:\n" ASM_NOP5 /* Move flags to rsp */ - " movq 144(%rsp), %rdx\n" - " movq %rdx, 152(%rsp)\n" + " movq 18*8(%rsp), %rdx\n" + " movq %rdx, 19*8(%rsp)\n" RESTORE_REGS_STRING /* Skip flags entry */ " addq $8, %rsp\n" " popfq\n" #else /* CONFIG_X86_32 */ - " pushf\n" + " pushl %esp\n" + " pushfl\n" SAVE_REGS_STRING " movl %esp, %edx\n" ".global optprobe_template_val\n" @@ -118,9 +119,13 @@ asm ( ".global optprobe_template_call\n" "optprobe_template_call:\n" ASM_NOP5 + /* Move flags into esp */ + " movl 14*4(%esp), %edx\n" + " movl %edx, 15*4(%esp)\n" RESTORE_REGS_STRING - " addl $4, %esp\n" /* skip cs */ - " popf\n" + /* Skip flags entry */ + " addl $4, %esp\n" + " popfl\n" #endif ".global optprobe_template_end\n" "optprobe_template_end:\n" @@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) } else { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* Save skipped registers */ -#ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); +#ifdef CONFIG_X86_32 + regs->cs |= get_kernel_rpl(); regs->gs = 0; #endif regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 320b70acb211..92ef0ec8c87c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); dotraplinkage void -do_async_page_fault(struct pt_regs *regs, unsigned long error_code) +do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { enum ctx_state prev_state; switch (kvm_read_and_reset_pf_reason()) { default: - do_page_fault(regs, error_code); + do_page_fault(regs, error_code, address); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ prev_state = exception_enter(); - kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); + kvm_async_pf_task_wait((u32)address, !user_mode(regs)); exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); - kvm_async_pf_task_wake((u32)read_cr2()); + kvm_async_pf_task_wake((u32)address); rcu_irq_exit(); break; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4df7705022b9..d7f40ad34432 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void) } fs_initcall(nmi_warning_debugfs); -static void nmi_max_handler(struct irq_work *w) +static void nmi_check_duration(struct nmiaction *action, u64 duration) { - struct nmiaction *a = container_of(w, struct nmiaction, irq_work); + u64 whole_msecs = READ_ONCE(action->max_duration); int remainder_ns, decimal_msecs; - u64 whole_msecs = READ_ONCE(a->max_duration); + + if (duration < nmi_longest_ns || duration < action->max_duration) + return; + + action->max_duration = duration; remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; printk_ratelimited(KERN_INFO "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", - a->handler, whole_msecs, decimal_msecs); + action->handler, whole_msecs, decimal_msecs); } static int nmi_handle(unsigned int type, struct pt_regs *regs) @@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs) delta = sched_clock() - delta; trace_nmi_handler(a->handler, (int)delta, thishandled); - if (delta < nmi_longest_ns || delta < a->max_duration) - continue; - - a->max_duration = delta; - irq_work_queue(&a->irq_work); + nmi_check_duration(a, delta); } rcu_read_unlock(); @@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) if (!action->handler) return -EINVAL; - init_irq_work(&action->irq_work, nmi_max_handler); - raw_spin_lock_irqsave(&desc->lock, flags); /* diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 06f6bb48d018..f8ea3a00bdd9 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops = { .mmu.exit_mmap = paravirt_nop, #ifdef CONFIG_PARAVIRT_XXL - .mmu.read_cr2 = native_read_cr2, + .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2), .mmu.write_cr2 = native_write_cr2, .mmu.read_cr3 = __native_read_cr3, .mmu.write_cr3 = native_write_cr3, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 75fea0d48c0e..a04e43ff4b5e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -428,28 +428,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, lockdep_assert_irqs_disabled(); - /* - * If TIF_SSBD is different, select the proper mitigation - * method. Note that if SSBD mitigation is disabled or permanentely - * enabled this branch can't be taken because nothing can set - * TIF_SSBD. - */ - if (tif_diff & _TIF_SSBD) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + /* Handle change of TIF_SSBD depending on the mitigation method. */ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_ssb_virt_state(tifn); - } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_core_ssb_state(tifn); - } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - msr |= ssbd_tif_to_spec_ctrl(tifn); - updmsr = true; - } + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + updmsr |= !!(tif_diff & _TIF_SSBD); + msr |= ssbd_tif_to_spec_ctrl(tifn); } - /* - * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, - * otherwise avoid the MSR write. - */ + /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */ if (IS_ENABLED(CONFIG_SMP) && static_branch_unlikely(&switch_to_cond_stibp)) { updmsr |= !!(tif_diff & _TIF_SPEC_IB); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2399e910d109..b8ceec4974fe 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; - unsigned long sp; - unsigned short ss, gs; + unsigned short gs; - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss; + if (user_mode(regs)) gs = get_user_gs(regs); - } else { - sp = kernel_stack_pointer(regs); - savesegment(ss, ss); + else savesegment(gs, gs); - } show_ip(regs, KERN_DEFAULT); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", - regs->si, regs->di, regs->bp, sp); + regs->si, regs->di, regs->bp, regs->sp); printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags); if (mode != SHOW_REGS_ALL) return; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 250e4c4ac6d9..d02874cddda3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -326,7 +326,7 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task, */ mutex_lock(&task->mm->context.lock); ldt = task->mm->context.ldt; - if (unlikely(idx >= ldt->nr_entries)) + if (unlikely(!ldt || idx >= ldt->nr_entries)) base = 0; else base = get_desc_base(ldt->entries + idx); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 00fccf952d9b..b47dfeb3a02a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -155,35 +155,6 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -/* - * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode - * when it traps. The previous stack will be directly underneath the saved - * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. - * - * Now, if the stack is empty, '®s->sp' is out of range. In this - * case we try to take the previous stack. To always return a non-null - * stack pointer we fall back to regs as stack if no previous stack - * exists. - * - * This is valid only for kernel mode traps. - */ -unsigned long kernel_stack_pointer(struct pt_regs *regs) -{ - unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); - unsigned long sp = (unsigned long)®s->sp; - u32 *prev_esp; - - if (context == (sp & ~(THREAD_SIZE - 1))) - return sp; - - prev_esp = (u32 *)(context); - if (*prev_esp) - return (unsigned long)*prev_esp; - - return (unsigned long)regs; -} -EXPORT_SYMBOL_GPL(kernel_stack_pointer); - static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 09d6bded3c1e..3f677832fc12 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -197,6 +197,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), }, }, + { /* Handle problems with rebooting on Apple MacBook6,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook6,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"), + }, + }, { /* Handle problems with rebooting on Apple MacBookPro5 */ .callback = set_pci_reboot, .ident = "Apple MacBookPro5", diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 362dd8953f48..5fe11d5d29c1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -210,17 +210,11 @@ static void notrace start_secondary(void *unused) * before cpu_init(), SMP booting is too fragile that we want to * limit the things done here to the most necessary things. */ - if (boot_cpu_has(X86_FEATURE_PCID)) - __write_cr4(__read_cr4() | X86_CR4_PCIDE); + cr4_init(); #ifdef CONFIG_X86_32 /* switch away from the initial page table */ load_cr3(swapper_pg_dir); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); __flush_tlb_all(); #endif load_current_idt(); @@ -263,6 +257,14 @@ static void notrace start_secondary(void *unused) wmb(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + + /* + * Prevent tail call to cpu_startup_entry() because the stack protector + * guard has been changed a couple of function calls up, in + * boot_init_stack_canary() and must not be checked before tail calling + * another function. + */ + prevent_tail_call_optimization(); } /** diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 2d6898c2cb64..6d83b4b857e6 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -58,7 +58,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, * or a page fault), which can make frame pointers * unreliable. */ - if (IS_ENABLED(CONFIG_FRAME_POINTER)) return -EINVAL; } @@ -81,10 +80,6 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, if (unwind_error(&state)) return -EINVAL; - /* Success path for non-user tasks, i.e. kthreads and idle tasks */ - if (!(task->flags & (PF_KTHREAD | PF_IDLE))) - return -EINVAL; - return 0; } diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 01f0e2263b86..298fc1edd9c9 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -90,11 +90,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length); /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 0e14f6c0d35e..befb70359551 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,10 +25,6 @@ #include <asm/hpet.h> #include <asm/time.h> -#ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; -#endif - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -37,8 +33,7 @@ unsigned long profile_pc(struct pt_regs *regs) #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else - unsigned long *sp = - (unsigned long *)kernel_stack_pointer(regs); + unsigned long *sp = (unsigned long *)regs->sp; /* * Return address is either directly at stack pointer * or above a saved flags. Eflags has bits 22-31 zero, diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8b6d03e55d2f..648bfb2b021d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message, #ifdef CONFIG_X86_64 /* Runs on IST stack */ -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2) { static const char str[] = "double fault"; struct task_struct *tsk = current; -#ifdef CONFIG_VMAP_STACK - unsigned long cr2; -#endif #ifdef CONFIG_X86_ESPFIX64 extern unsigned char native_irq_return_iret[]; @@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * stack even if the actual trigger for the double fault was * something else. */ - cr2 = read_cr2(); if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE) handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2); #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 0b29e58f288e..f30730621c8a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -665,6 +665,16 @@ unsigned long native_calibrate_tsc(void) if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT) setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * The local APIC appears to be fed by the core crystal clock + * (which sounds entirely sensible). We can set the global + * lapic_timer_period here to avoid having to calibrate the APIC + * timer later. + */ + lapic_timer_period = crystal_khz * 1000 / HZ; +#endif + return crystal_khz * ebx_numerator / eax_denominator; } @@ -1488,6 +1498,9 @@ void __init tsc_init(void) return; } + if (tsc_clocksource_reliable || no_tsc_watchdog) + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); } diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 3d0e9aeea7c8..6f56071aa909 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -15,18 +15,46 @@ #include <asm/param.h> #include <asm/tsc.h> -#define MAX_NUM_FREQS 9 +#define MAX_NUM_FREQS 16 /* 4 bits to select the frequency */ + +/* + * The frequency numbers in the SDM are e.g. 83.3 MHz, which does not contain a + * lot of accuracy which leads to clock drift. As far as we know Bay Trail SoCs + * use a 25 MHz crystal and Cherry Trail uses a 19.2 MHz crystal, the crystal + * is the source clk for a root PLL which outputs 1600 and 100 MHz. It is + * unclear if the root PLL outputs are used directly by the CPU clock PLL or + * if there is another PLL in between. + * This does not matter though, we can model the chain of PLLs as a single PLL + * with a quotient equal to the quotients of all PLLs in the chain multiplied. + * So we can create a simplified model of the CPU clock setup using a reference + * clock of 100 MHz plus a quotient which gets us as close to the frequency + * from the SDM as possible. + * For the 83.3 MHz example from above this would give us 100 MHz * 5 / 6 = + * 83 and 1/3 MHz, which matches exactly what has been measured on actual hw. + */ +#define TSC_REFERENCE_KHZ 100000 + +struct muldiv { + u32 multiplier; + u32 divider; +}; /* * If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40]. * Unfortunately some Intel Atom SoCs aren't quite compliant to this, * so we need manually differentiate SoC families. This is what the - * field msr_plat does. + * field use_msr_plat does. */ struct freq_desc { - u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */ + bool use_msr_plat; + struct muldiv muldiv[MAX_NUM_FREQS]; + /* + * Some CPU frequencies in the SDM do not map to known PLL freqs, in + * that case the muldiv array is empty and the freqs array is used. + */ u32 freqs[MAX_NUM_FREQS]; + u32 mask; }; /* @@ -35,27 +63,74 @@ struct freq_desc { * by MSR based on SDM. */ static const struct freq_desc freq_desc_pnw = { - 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } + .use_msr_plat = false, + .freqs = { 0, 0, 0, 0, 0, 99840, 0, 83200 }, + .mask = 0x07, }; static const struct freq_desc freq_desc_clv = { - 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } + .use_msr_plat = false, + .freqs = { 0, 133200, 0, 0, 0, 99840, 0, 83200 }, + .mask = 0x07, }; +/* + * Bay Trail SDM MSR_FSB_FREQ frequencies simplified PLL model: + * 000: 100 * 5 / 6 = 83.3333 MHz + * 001: 100 * 1 / 1 = 100.0000 MHz + * 010: 100 * 4 / 3 = 133.3333 MHz + * 011: 100 * 7 / 6 = 116.6667 MHz + * 100: 100 * 4 / 5 = 80.0000 MHz + */ static const struct freq_desc freq_desc_byt = { - 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } + .use_msr_plat = true, + .muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 7, 6 }, + { 4, 5 } }, + .mask = 0x07, }; +/* + * Cherry Trail SDM MSR_FSB_FREQ frequencies simplified PLL model: + * 0000: 100 * 5 / 6 = 83.3333 MHz + * 0001: 100 * 1 / 1 = 100.0000 MHz + * 0010: 100 * 4 / 3 = 133.3333 MHz + * 0011: 100 * 7 / 6 = 116.6667 MHz + * 0100: 100 * 4 / 5 = 80.0000 MHz + * 0101: 100 * 14 / 15 = 93.3333 MHz + * 0110: 100 * 9 / 10 = 90.0000 MHz + * 0111: 100 * 8 / 9 = 88.8889 MHz + * 1000: 100 * 7 / 8 = 87.5000 MHz + */ static const struct freq_desc freq_desc_cht = { - 1, { 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500 } + .use_msr_plat = true, + .muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 7, 6 }, + { 4, 5 }, { 14, 15 }, { 9, 10 }, { 8, 9 }, + { 7, 8 } }, + .mask = 0x0f, }; +/* + * Merriefield SDM MSR_FSB_FREQ frequencies simplified PLL model: + * 0001: 100 * 1 / 1 = 100.0000 MHz + * 0010: 100 * 4 / 3 = 133.3333 MHz + */ static const struct freq_desc freq_desc_tng = { - 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } + .use_msr_plat = true, + .muldiv = { { 0, 0 }, { 1, 1 }, { 4, 3 } }, + .mask = 0x07, }; +/* + * Moorefield SDM MSR_FSB_FREQ frequencies simplified PLL model: + * 0000: 100 * 5 / 6 = 83.3333 MHz + * 0001: 100 * 1 / 1 = 100.0000 MHz + * 0010: 100 * 4 / 3 = 133.3333 MHz + * 0011: 100 * 1 / 1 = 100.0000 MHz + */ static const struct freq_desc freq_desc_ann = { - 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } + .use_msr_plat = true, + .muldiv = { { 5, 6 }, { 1, 1 }, { 4, 3 }, { 1, 1 } }, + .mask = 0x0f, }; static const struct x86_cpu_id tsc_msr_cpu_ids[] = { @@ -71,22 +146,24 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = { /* * MSR-based CPU/TSC frequency discovery for certain CPUs. * - * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy + * Set global "lapic_timer_period" to bus_clock_cycles/jiffy * Return processor base frequency in KHz, or 0 on failure. */ unsigned long cpu_khz_from_msr(void) { - u32 lo, hi, ratio, freq; + u32 lo, hi, ratio, freq, tscref; const struct freq_desc *freq_desc; const struct x86_cpu_id *id; + const struct muldiv *md; unsigned long res; + int index; id = x86_match_cpu(tsc_msr_cpu_ids); if (!id) return 0; freq_desc = (struct freq_desc *)id->driver_data; - if (freq_desc->msr_plat) { + if (freq_desc->use_msr_plat) { rdmsr(MSR_PLATFORM_INFO, lo, hi); ratio = (lo >> 8) & 0xff; } else { @@ -96,15 +173,31 @@ unsigned long cpu_khz_from_msr(void) /* Get FSB FREQ ID */ rdmsr(MSR_FSB_FREQ, lo, hi); + index = lo & freq_desc->mask; + md = &freq_desc->muldiv[index]; - /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */ - freq = freq_desc->freqs[lo & 0x7]; + /* + * Note this also catches cases where the index points to an unpopulated + * part of muldiv, in that case the else will set freq and res to 0. + */ + if (md->divider) { + tscref = TSC_REFERENCE_KHZ * md->multiplier; + freq = DIV_ROUND_CLOSEST(tscref, md->divider); + /* + * Multiplying by ratio before the division has better + * accuracy than just calculating freq * ratio. + */ + res = DIV_ROUND_CLOSEST(tscref * ratio, md->divider); + } else { + freq = freq_desc->freqs[index]; + res = freq * ratio; + } - /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ - res = freq * ratio; + if (freq == 0) + pr_err("Error MSR_FSB_FREQ index %d is unknown\n", index); #ifdef CONFIG_X86_LOCAL_APIC - lapic_timer_frequency = (freq * 1000) / HZ; + lapic_timer_period = (freq * 1000) / HZ; #endif /* diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 6106760de716..a224b5ab103f 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state) } } -static size_t regs_size(struct pt_regs *regs) -{ - /* x86_32 regs from kernel mode are two words shorter: */ - if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs)) - return sizeof(*regs) - 2*sizeof(long); - - return sizeof(*regs); -} - static bool in_entry_code(unsigned long ip) { char *addr = (char *)ip; @@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) } #endif -#ifdef CONFIG_X86_32 -#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long)) -#else -#define KERNEL_REGS_SIZE (sizeof(struct pt_regs)) -#endif - static bool update_stack_state(struct unwind_state *state, unsigned long *next_bp) { @@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state, size_t len; if (state->regs) - prev_frame_end = (void *)state->regs + regs_size(state->regs); + prev_frame_end = (void *)state->regs + sizeof(*state->regs); else prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; @@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state, regs = decode_frame_pointer(next_bp); if (regs) { frame = (unsigned long *)regs; - len = KERNEL_REGS_SIZE; + len = sizeof(*regs); state->got_irq = true; } else { frame = next_bp; @@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state, frame < prev_frame_end) return false; - /* - * On 32-bit with user mode regs, make sure the last two regs are safe - * to access: - */ - if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) && - !on_stack(info, frame, len + 2*sizeof(long))) - return false; - /* Move state to the next frame: */ if (regs) { state->regs = regs; @@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, * Pretend that the frame is complete and that BP points to it, but save * the real BP so that we can use it when looking for the next frame. */ - if (regs && regs->ip == 0 && - (unsigned long *)kernel_stack_pointer(regs) >= first_frame) { + if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) { state->next_bp = bp; - bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; + bp = ((unsigned long *)regs->sp) - 1; } /* Initialize stack info and make sure the frame data is accessible: */ diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 72b997eaa1fc..187a86e0e753 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -142,9 +142,6 @@ static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; - if (!orc_init) - return NULL; - if (ip == 0) return &null_orc_entry; @@ -314,12 +311,19 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) { + struct task_struct *task = state->task; + if (unwind_done(state)) return NULL; if (state->regs) return &state->regs->ip; + if (task != current && state->sp == task->thread.sp) { + struct inactive_task_frame *frame = (void *)task->thread.sp; + return &frame->ret_addr; + } + if (state->sp) return (unsigned long *)state->sp - 1; @@ -378,9 +382,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr return true; } +/* + * If state->regs is non-NULL, and points to a full pt_regs, just get the reg + * value from state->regs. + * + * Otherwise, if state->regs just points to IRET regs, and the previous frame + * had full regs, it's safe to get the value from the previous regs. This can + * happen when early/late IRQ entry code gets interrupted by an NMI. + */ +static bool get_reg(struct unwind_state *state, unsigned int reg_off, + unsigned long *val) +{ + unsigned int reg = reg_off/8; + + if (!state->regs) + return false; + + if (state->full_regs) { + *val = ((unsigned long *)state->regs)[reg]; + return true; + } + + if (state->prev_regs) { + *val = ((unsigned long *)state->prev_regs)[reg]; + return true; + } + + return false; +} + bool unwind_next_frame(struct unwind_state *state) { - unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp; + unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp; enum stack_type prev_type = state->stack_info.type; struct orc_entry *orc; bool indirect = false; @@ -398,8 +431,11 @@ bool unwind_next_frame(struct unwind_state *state) /* * Find the orc_entry associated with the text address. * - * Decrement call return addresses by one so they work for sibling - * calls and calls to noreturn functions. + * For a call frame (as opposed to a signal frame), state->ip points to + * the instruction after the call. That instruction's stack layout + * could be different from the call instruction's layout, for example + * if the call was to a noreturn function. So get the ORC data for the + * call instruction itself. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); if (!orc) { @@ -442,39 +478,35 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_REG_R10: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) { orc_warn("missing regs for base reg R10 at ip %pB\n", (void *)state->ip); goto err; } - sp = state->regs->r10; break; case ORC_REG_R13: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) { orc_warn("missing regs for base reg R13 at ip %pB\n", (void *)state->ip); goto err; } - sp = state->regs->r13; break; case ORC_REG_DI: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) { orc_warn("missing regs for base reg DI at ip %pB\n", (void *)state->ip); goto err; } - sp = state->regs->di; break; case ORC_REG_DX: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) { orc_warn("missing regs for base reg DX at ip %pB\n", (void *)state->ip); goto err; } - sp = state->regs->dx; break; default: @@ -501,6 +533,7 @@ bool unwind_next_frame(struct unwind_state *state) state->sp = sp; state->regs = NULL; + state->prev_regs = NULL; state->signal = false; break; @@ -512,6 +545,7 @@ bool unwind_next_frame(struct unwind_state *state) } state->regs = (struct pt_regs *)sp; + state->prev_regs = NULL; state->full_regs = true; state->signal = true; break; @@ -523,6 +557,8 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } + if (state->full_regs) + state->prev_regs = state->regs; state->regs = (void *)sp - IRET_FRAME_OFFSET; state->full_regs = false; state->signal = true; @@ -531,14 +567,14 @@ bool unwind_next_frame(struct unwind_state *state) default: orc_warn("unknown .orc_unwind entry type %d for ip %pB\n", orc->type, (void *)orig_ip); - break; + goto err; } /* Find BP: */ switch (orc->bp_reg) { case ORC_REG_UNDEFINED: - if (state->regs && state->full_regs) - state->bp = state->regs->bp; + if (get_reg(state, offsetof(struct pt_regs, bp), &tmp)) + state->bp = tmp; break; case ORC_REG_PREV_SP: @@ -585,20 +621,23 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, memset(state, 0, sizeof(*state)); state->task = task; + if (!orc_init) + goto err; + /* * Refuse to unwind the stack of a task while it's executing on another * CPU. This check is racy, but that's ok: the unwinder has other * checks to prevent it from going off the rails. */ if (task_on_another_cpu(task)) - goto done; + goto err; if (regs) { if (user_mode(regs)) - goto done; + goto the_end; state->ip = regs->ip; - state->sp = kernel_stack_pointer(regs); + state->sp = regs->sp; state->bp = regs->bp; state->regs = regs; state->full_regs = true; @@ -617,6 +656,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, state->sp = task->thread.sp; state->bp = READ_ONCE_NOCHECK(frame->bp); state->ip = READ_ONCE_NOCHECK(frame->ret_addr); + state->signal = (void *)state->ip == ret_from_fork; } if (get_stack_info((unsigned long *)state->sp, state->task, @@ -628,6 +668,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, * generate some kind of backtrace if this happens. */ void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); + state->error = true; if (get_stack_info(next_page, state->task, &state->stack_info, &state->stack_mask)) return; @@ -648,13 +689,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Otherwise, skip ahead to the user-specified starting frame: */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->sp <= (unsigned long)first_frame)) + state->sp < (unsigned long)first_frame)) unwind_next_frame(state); return; -done: +err: + state->error = true; +the_end: state->stack_info.type = STACK_TYPE_UNKNOWN; - return; } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4d1517022a14..96d8025ea1b0 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -36,13 +36,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; #else OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; #endif +jiffies = jiffies_64; + #if defined(CONFIG_X86_64) /* * On 64-bit, align RODATA to 2MB so we retain large page mappings for @@ -362,6 +362,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) + . = ALIGN(PAGE_SIZE); *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 4992e7c99588..e8e7d053c7b4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -333,6 +333,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; unsigned f_la57 = 0; + unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0; /* cpuid 1.edx */ const u32 kvm_cpuid_1_edx_x86_features = @@ -399,7 +400,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | + F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); @@ -415,7 +416,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, r = -E2BIG; - if (*nent >= maxnent) + if (WARN_ON(*nent >= maxnent)) goto out; do_cpuid_1_ent(entry, function, index); @@ -494,6 +495,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* Set LA57 based on hardware capability. */ entry->ecx |= f_la57; entry->ecx |= f_umip; + entry->ecx |= f_pku; /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); @@ -733,6 +735,9 @@ out: static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, u32 idx, int *nent, int maxnent, unsigned int type) { + if (*nent >= maxnent) + return -E2BIG; + if (type == KVM_GET_EMULATED_CPUID) return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2fe9912aeed7..5560ac478135 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -22,6 +22,7 @@ #include "kvm_cache_regs.h" #include <asm/kvm_emulate.h> #include <linux/stringify.h> +#include <asm/fpu/api.h> #include <asm/debugreg.h> #include <asm/nospec-branch.h> @@ -1057,8 +1058,23 @@ static void fetch_register_operand(struct operand *op) } } +static void emulator_get_fpu(void) +{ + fpregs_lock(); + + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); +} + +static void emulator_put_fpu(void) +{ + fpregs_unlock(); +} + static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; @@ -1080,11 +1096,13 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) #endif default: BUG(); } + emulator_put_fpu(); } static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; @@ -1106,10 +1124,12 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, #endif default: BUG(); } + emulator_put_fpu(); } static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; @@ -1121,10 +1141,12 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; @@ -1136,6 +1158,7 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1143,7 +1166,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fninit"); + emulator_put_fpu(); return X86EMUL_CONTINUE; } @@ -1154,7 +1179,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstcw %0": "+m"(fcw)); + emulator_put_fpu(); ctxt->dst.val = fcw; @@ -1168,7 +1195,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstsw %0": "+m"(fsw)); + emulator_put_fpu(); ctxt->dst.val = fsw; @@ -4076,8 +4105,12 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + emulator_put_fpu(); + if (rc != X86EMUL_CONTINUE) return rc; @@ -4120,6 +4153,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) @@ -4135,6 +4170,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: + emulator_put_fpu(); + return rc; } @@ -5121,6 +5158,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->fetch.ptr = ctxt->fetch.data; ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; + ctxt->intercept = x86_intercept_none; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { @@ -5173,16 +5211,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->ad_bytes = def_ad_bytes ^ 6; break; case 0x26: /* ES override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_ES; + break; case 0x2e: /* CS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_CS; + break; case 0x36: /* SS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_SS; + break; case 0x3e: /* DS override */ has_seg_override = true; - ctxt->seg_override = (ctxt->b >> 3) & 3; + ctxt->seg_override = VCPU_SREG_DS; break; case 0x64: /* FS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_FS; + break; case 0x65: /* GS override */ has_seg_override = true; - ctxt->seg_override = ctxt->b & 7; + ctxt->seg_override = VCPU_SREG_GS; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -5266,10 +5316,15 @@ done_prefixes: } break; case Escape: - if (ctxt->modrm > 0xbf) - opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; - else + if (ctxt->modrm > 0xbf) { + size_t size = ARRAY_SIZE(opcode.u.esc->high); + u32 index = array_index_nospec( + ctxt->modrm - 0xc0, size); + + opcode = opcode.u.esc->high[index]; + } else { opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + } break; case InstrDual: if ((ctxt->modrm >> 6) == 3) @@ -5411,7 +5466,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; + emulator_get_fpu(); rc = asm_safe("fwait"); + emulator_put_fpu(); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 72200998687c..2d4c1fe5f380 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -808,11 +808,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - *pdata = hv->hv_crash_param[index]; + *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; return 0; } @@ -851,11 +852,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 data) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - hv->hv_crash_param[index] = data; + hv->hv_crash_param[array_index_nospec(index, size)] = data; return 0; } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 8b38bb4868a6..629a09ca9860 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s, switch (addr) { case 0x20: case 0x21: + pic_lock(s); + pic_ioport_write(&s->pics[0], addr, data); + pic_unlock(s); + break; case 0xa0: case 0xa1: pic_lock(s); - pic_ioport_write(&s->pics[addr >> 7], addr, data); + pic_ioport_write(&s->pics[1], addr, data); pic_unlock(s); break; case 0x4d0: diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 1add1bc881e2..9446613625e1 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -36,6 +36,7 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/nospec.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/current.h> @@ -73,13 +74,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; + u64 redir_content = ~0ULL; - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; + if (redir_index < IOAPIC_NUM_PINS) { + u32 index = array_index_nospec( + redir_index, IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[index].bits; + } result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : @@ -297,6 +299,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) ioapic_debug("change redir index %x val %x\n", index, val); if (index >= IOAPIC_NUM_PINS) return; + index = array_index_nospec(index, IOAPIC_NUM_PINS); e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; /* Preserve read-only fields */ diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 924b3bd5a7b7..e3ab10d6b12e 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -416,7 +416,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); } diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 1cc6c47dc77e..341f58a01de0 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -7,7 +7,7 @@ #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD) #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8d22c79f5333..9483b9da8864 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -59,9 +59,6 @@ #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) #define LAPIC_MMIO_LENGTH (1 << 12) /* followed define is not in apicdef.h */ -#define APIC_SHORT_MASK 0xc0000 -#define APIC_DEST_NOSHORT 0x0 -#define APIC_DEST_MASK 0x800 #define MAX_APIC_VECTOR 256 #define APIC_VECTORS_PER_REG 32 @@ -635,9 +632,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) { u8 val; - if (pv_eoi_get_user(vcpu, &val) < 0) + if (pv_eoi_get_user(vcpu, &val) < 0) { apic_debug("Can't read EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); + return false; + } return val & 0x1; } @@ -1063,11 +1062,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_clear_vector(vector, apic->regs + APIC_TMR); } - if (vcpu->arch.apicv_active) - kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - else { + if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } @@ -1918,15 +1914,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: + case APIC_LVTERR: { /* TODO: Check vector */ + size_t size; + u32 index; + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - - val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; + size = ARRAY_SIZE(apic_lvt_mask); + index = array_index_nospec( + (reg - APIC_LVTT) >> 4, size); + val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); - break; + } case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) @@ -2086,7 +2087,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || + if (!kvm_apic_present(vcpu) || apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) return; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index d6d049ba3045..29980b4cc57b 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -10,8 +10,9 @@ #define KVM_APIC_SIPI 1 #define KVM_APIC_LVT_NUM 6 -#define KVM_APIC_SHORT_MASK 0xc0000 -#define KVM_APIC_DEST_MASK 0x800 +#define APIC_SHORT_MASK 0xc0000 +#define APIC_DEST_NOSHORT 0x0 +#define APIC_DEST_MASK 0x800 #define APIC_BUS_CYCLE_NS 1 #define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d21517f49ba1..bc5154c57b4e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -154,11 +154,6 @@ module_param(dbg, bool, 0644); #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ | shadow_x_mask | shadow_nx_mask | shadow_me_mask) -#define ACC_EXEC_MASK 1 -#define ACC_WRITE_MASK PT_WRITABLE_MASK -#define ACC_USER_MASK PT_USER_MASK -#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) - /* The mask for the R/X bits in EPT PTEs */ #define PT64_EPT_READABLE_MASK 0x1ull #define PT64_EPT_EXECUTABLE_MASK 0x4ull @@ -242,6 +237,7 @@ static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_mmio_value; +static u64 __read_mostly shadow_mmio_access_mask; static u64 __read_mostly shadow_present_mask; static u64 __read_mostly shadow_me_mask; @@ -284,6 +280,11 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; */ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; +/* + * The number of non-reserved physical address bits irrespective of features + * that repurpose legal bits, e.g. MKTME. + */ +u8 __read_mostly shadow_phys_bits; static void mmu_spte_set(u64 *sptep, u64 spte); static union kvm_mmu_page_role @@ -318,11 +319,15 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, kvm_flush_remote_tlbs_with_range(kvm, &range); } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) +void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { + BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); + WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; + shadow_mmio_access_mask = access_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); @@ -413,7 +418,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, u64 mask = generation_mmio_spte_mask(gen); u64 gpa = gfn << PAGE_SHIFT; - access &= ACC_WRITE_MASK | ACC_USER_MASK; + access &= shadow_mmio_access_mask; mask |= shadow_mmio_value | access; mask |= gpa | shadow_nonpresent_or_rsvd_mask; mask |= (gpa & shadow_nonpresent_or_rsvd_mask) @@ -440,8 +445,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte) static unsigned get_mmio_spte_access(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask; - return (spte & ~mask) & ~PAGE_MASK; + return spte & shadow_mmio_access_mask; } static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, @@ -496,6 +500,25 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); +static u8 kvm_get_shadow_phys_bits(void) +{ + /* + * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected + * in CPU detection code, but the processor treats those reduced bits as + * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at + * the physical address bits reported by CPUID. + */ + if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008)) + return cpuid_eax(0x80000008) & 0xff; + + /* + * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with + * custom CPUID. Proceed with whatever the kernel found since these features + * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008). + */ + return boot_cpu_data.x86_phys_bits; +} + static void kvm_mmu_reset_all_pte_masks(void) { u8 low_phys_bits; @@ -509,6 +532,8 @@ static void kvm_mmu_reset_all_pte_masks(void) shadow_present_mask = 0; shadow_acc_track_mask = 0; + shadow_phys_bits = kvm_get_shadow_phys_bits(); + /* * If the CPU has 46 or less physical address bits, then set an * appropriate mask to guard against L1TF attacks. Otherwise, it is @@ -520,16 +545,15 @@ static void kvm_mmu_reset_all_pte_masks(void) * the most significant bits of legal physical address space. */ shadow_nonpresent_or_rsvd_mask = 0; - low_phys_bits = boot_cpu_data.x86_cache_bits; - if (boot_cpu_data.x86_cache_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) { + low_phys_bits = boot_cpu_data.x86_phys_bits; + if (boot_cpu_has_bug(X86_BUG_L1TF) && + !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= + 52 - shadow_nonpresent_or_rsvd_mask_len)) { + low_phys_bits = boot_cpu_data.x86_cache_bits + - shadow_nonpresent_or_rsvd_mask_len; shadow_nonpresent_or_rsvd_mask = - rsvd_bits(boot_cpu_data.x86_cache_bits - - shadow_nonpresent_or_rsvd_mask_len, - boot_cpu_data.x86_cache_bits - 1); - low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; - } else - WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF)); + rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); + } shadow_nonpresent_or_rsvd_lower_gfn_mask = GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); @@ -1220,12 +1244,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); } -static int host_mapping_level(struct kvm *kvm, gfn_t gfn) +static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn) { unsigned long page_size; int i, ret = 0; - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { if (page_size >= KVM_HPAGE_SIZE(i)) @@ -1275,7 +1299,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, if (unlikely(*force_pt_level)) return PT_PAGE_TABLE_LEVEL; - host_level = host_mapping_level(vcpu->kvm, large_gfn); + host_level = host_mapping_level(vcpu, large_gfn); if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; @@ -1747,10 +1771,10 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, * Emulate arch specific page modification logging for the * nested hypervisor */ -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu) +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa) { if (kvm_x86_ops->write_log_dirty) - return kvm_x86_ops->write_log_dirty(vcpu); + return kvm_x86_ops->write_log_dirty(vcpu, l2_gpa); return 0; } @@ -3326,7 +3350,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, * here. */ if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && - level == PT_PAGE_TABLE_LEVEL && + !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && PageTransCompoundMap(pfn_to_page(pfn)) && !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { unsigned long mask; @@ -3363,7 +3387,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, } if (unlikely(is_noslot_pfn(pfn))) - vcpu_cache_mmio_info(vcpu, gva, gfn, access); + vcpu_cache_mmio_info(vcpu, gva, gfn, + access & shadow_mmio_access_mask); return false; } @@ -3457,7 +3482,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) * - true: let the vcpu to access on the same address again. * - false: let the real page fault path to fix it. */ -static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, +static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level, u32 error_code) { struct kvm_shadow_walk_iterator iterator; @@ -3477,7 +3502,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, do { u64 new_spte; - for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) + for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) if (!is_shadow_present_pte(spte) || iterator.level < level) break; @@ -3555,7 +3580,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } while (true); - trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, + trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, spte, fault_handled); walk_shadow_page_lockless_end(vcpu); @@ -3563,10 +3588,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable); static int make_mmu_pages_available(struct kvm_vcpu *vcpu); -static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, gfn_t gfn, bool prefault) { int r; @@ -3592,16 +3618,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); } - if (fast_page_fault(vcpu, v, level, error_code)) + if (fast_page_fault(vcpu, gpa, level, error_code)) return RET_PF_RETRY; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) + if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) return RET_PF_RETRY; - if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) + if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r)) return r; spin_lock(&vcpu->kvm->mmu_lock); @@ -3611,7 +3637,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, pfn, + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault, false); spin_unlock(&vcpu->kvm->mmu_lock); @@ -3912,7 +3938,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); -static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { if (exception) @@ -3920,7 +3946,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, return vaddr; } -static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -4080,13 +4106,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } -static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, +static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { - gfn_t gfn = gva >> PAGE_SHIFT; + gfn_t gfn = gpa >> PAGE_SHIFT; int r; - pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); + /* Note, paging is disabled, ergo gva == gpa. */ + pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code); if (page_fault_handle_page_track(vcpu, error_code, gfn)) return RET_PF_EMULATE; @@ -4098,11 +4125,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); - return nonpaging_map(vcpu, gva & PAGE_MASK, + return nonpaging_map(vcpu, gpa & PAGE_MASK, error_code, gfn, prefault); } -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + gfn_t gfn) { struct kvm_arch_async_pf arch; @@ -4111,7 +4139,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) @@ -4128,7 +4157,8 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable) { struct kvm_memory_slot *slot; bool async; @@ -4148,12 +4178,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; /* *pfn has correct page already */ if (!prefault && kvm_can_do_async_pf(vcpu)) { - trace_kvm_try_async_get_page(gva, gfn); + trace_kvm_try_async_get_page(cr2_or_gpa, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { - trace_kvm_async_pf_doublefault(gva, gfn); + trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return true; - } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) + } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) return true; } @@ -4166,6 +4196,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, { int r = 1; +#ifndef CONFIG_X86_64 + /* A 64-bit CR2 should be impossible on 32-bit KVM. */ + if (WARN_ON_ONCE(fault_address >> 32)) + return -EFAULT; +#endif + vcpu->arch.l1tf_flush_l1d = true; switch (vcpu->arch.apf.host_apf_reason) { default: @@ -4203,7 +4239,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); } -static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { kvm_pfn_t pfn; @@ -4510,7 +4546,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | - nonleaf_bit8_rsvd | gbpages_bit_rsvd | + gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51); @@ -4603,7 +4639,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) */ shadow_zero_check = &context->shadow_zero_check; __reset_rsvds_bits_mask(vcpu, shadow_zero_check, - boot_cpu_data.x86_phys_bits, + shadow_phys_bits, context->shadow_root_level, uses_nx, guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), is_pse(vcpu), true); @@ -4640,13 +4676,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(vcpu, shadow_zero_check, - boot_cpu_data.x86_phys_bits, + shadow_phys_bits, context->shadow_root_level, false, boot_cpu_has(X86_FEATURE_GBPAGES), true, true); else __reset_rsvds_bits_mask_ept(shadow_zero_check, - boot_cpu_data.x86_phys_bits, + shadow_phys_bits, false); if (!shadow_me_mask) @@ -4667,7 +4703,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly) { __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, - boot_cpu_data.x86_phys_bits, execonly); + shadow_phys_bits, execonly); } #define BYTE_MASK(access) \ @@ -5462,7 +5498,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) return 0; } -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; @@ -5472,18 +5508,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, /* With shadow page tables, fault_address contains a GVA or nGPA. */ if (vcpu->arch.mmu->direct_map) { vcpu->arch.gpa_available = true; - vcpu->arch.gpa_val = cr2; + vcpu->arch.gpa_val = cr2_or_gpa; } r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, cr2, direct); + r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { - r = vcpu->arch.mmu->page_fault(vcpu, cr2, + r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, lower_32_bits(error_code), false); WARN_ON(r == RET_PF_INVALID); @@ -5503,7 +5539,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, */ if (vcpu->arch.mmu->direct_map && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); return 1; } @@ -5518,7 +5554,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * explicitly shadowing L1's page tables, i.e. unprotecting something * for L1 isn't going to magically fix whatever issue cause L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* @@ -5533,7 +5569,7 @@ emulate: return 1; } - er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); + er = x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); switch (er) { case EMULATE_DONE: @@ -5954,9 +5990,9 @@ restart: * the guest, and the guest page table is using 4K page size * mapping if the indirect sp has level = 1. */ - if (sp->role.direct && - !kvm_is_reserved_pfn(pfn) && - PageTransCompoundMap(pfn_to_page(pfn))) { + if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && + !kvm_is_zone_device_pfn(pfn) && + PageTransCompoundMap(pfn_to_page(pfn))) { pte_list_remove(rmap_head, sptep); if (kvm_available_flush_tlb_with_range()) @@ -6189,14 +6225,13 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) if (new_val != old_val) { struct kvm *kvm; - int idx; spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - idx = srcu_read_lock(&kvm->srcu); + mutex_lock(&kvm->slots_lock); kvm_mmu_zap_all_fast(kvm); - srcu_read_unlock(&kvm->srcu, idx); + mutex_unlock(&kvm->slots_lock); wake_up_process(kvm->arch.nx_lpage_recovery_thread); } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 4610230ddaea..b0fefb3ccc87 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -43,6 +43,11 @@ #define PT32_ROOT_LEVEL 2 #define PT32E_ROOT_LEVEL 3 +#define ACC_EXEC_MASK 1 +#define ACC_WRITE_MASK PT_WRITABLE_MASK +#define ACC_USER_MASK PT_USER_MASK +#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) + static inline u64 rsvd_bits(int s, int e) { if (e < s) @@ -51,7 +56,7 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); +void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); @@ -209,7 +214,7 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa); int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index dd30dccd2ad5..c2cabd4c6678 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -246,13 +246,13 @@ TRACE_EVENT( TRACE_EVENT( fast_page_fault, - TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, u64 *sptep, u64 old_spte, bool retry), - TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), + TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), TP_STRUCT__entry( __field(int, vcpu_id) - __field(gva_t, gva) + __field(gpa_t, cr2_or_gpa) __field(u32, error_code) __field(u64 *, sptep) __field(u64, old_spte) @@ -262,7 +262,7 @@ TRACE_EVENT( TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->gva = gva; + __entry->cr2_or_gpa = cr2_or_gpa; __entry->error_code = error_code; __entry->sptep = sptep; __entry->old_spte = old_spte; @@ -270,9 +270,9 @@ TRACE_EVENT( __entry->retry = retry; ), - TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" + TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" " new %llx spurious %d fixed %d", __entry->vcpu_id, - __entry->gva, __print_flags(__entry->error_code, "|", + __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", kvm_mmu_trace_pferr_flags), __entry->sptep, __entry->old_spte, __entry->new_spte, __spte_satisfied(old_spte), __spte_satisfied(new_spte) diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 25ce3edd1872..7f0059aa30e1 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) break; case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: *seg = 1; - *unit = msr - MSR_MTRRfix16K_80000; + *unit = array_index_nospec( + msr - MSR_MTRRfix16K_80000, + MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); break; case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: *seg = 2; - *unit = msr - MSR_MTRRfix4K_C0000; + *unit = array_index_nospec( + msr - MSR_MTRRfix4K_C0000, + MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); break; default: return false; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4a906f7111ce..7f64d44735f9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -33,7 +33,7 @@ #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 - #define PT_MAX_FULL_LEVELS 4 + #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #define CMPXCHG cmpxchg #else #define CMPXCHG cmpxchg64 @@ -220,7 +220,7 @@ static inline unsigned FNAME(gpte_access)(u64 gpte) static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct guest_walker *walker, - int write_fault) + gpa_t addr, int write_fault) { unsigned level, index; pt_element_t pte, orig_pte; @@ -245,7 +245,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, !(pte & PT_GUEST_DIRTY_MASK)) { trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); #if PTTYPE == PTTYPE_EPT - if (kvm_arch_write_log_dirty(vcpu)) + if (kvm_arch_write_log_dirty(vcpu, addr)) return -EINVAL; #endif pte |= PT_GUEST_DIRTY_MASK; @@ -291,11 +291,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) } /* - * Fetch a guest pte for a guest virtual address + * Fetch a guest pte for a guest virtual address, or for an L2's GPA. */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gva_t addr, u32 access) + gpa_t addr, u32 access) { int ret; pt_element_t pte; @@ -442,7 +442,8 @@ retry_walk: (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { - ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); + ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, + addr, write_fault); if (unlikely(ret < 0)) goto error; else if (ret) @@ -496,7 +497,7 @@ error: } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, u32 access) + struct kvm_vcpu *vcpu, gpa_t addr, u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); @@ -610,7 +611,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If the guest tries to write a write-protected page, we need to * emulate this operation, return 1 to indicate this case. */ -static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, struct guest_walker *gw, int write_fault, int hlevel, kvm_pfn_t pfn, bool map_writable, bool prefault, @@ -763,7 +764,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ -static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, bool prefault) { int write_fault = error_code & PFERR_WRITE_MASK; @@ -945,18 +946,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) spin_unlock(&vcpu->kvm->mmu_lock); } -static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, +/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ +static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access, struct x86_exception *exception) { struct guest_walker walker; gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); + r = FNAME(walk_addr)(&walker, vcpu, addr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); - gpa |= vaddr & ~PAGE_MASK; + gpa |= addr & ~PAGE_MASK; } else if (exception) *exception = walker.fault; @@ -964,7 +966,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, } #if PTTYPE != PTTYPE_EPT -static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, +/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */ +static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -972,6 +975,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, gpa_t gpa = UNMAPPED_GVA; int r; +#ifndef CONFIG_X86_64 + /* A 64-bit GVA should be impossible on 32-bit KVM. */ + WARN_ON_ONCE(vaddr >> 32); +#endif + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access); if (r) { diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 22dff661145a..7b4828e50ab3 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_PMU_H #define __KVM_X86_PMU_H +#include <linux/nospec.h> + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) @@ -86,8 +88,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc) static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { - if (msr >= base && msr < base + pmu->nr_arch_gp_counters) - return &pmu->gp_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_gp_counters); + + return &pmu->gp_counters[index]; + } return NULL; } @@ -97,8 +103,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) { int base = MSR_CORE_PERF_FIXED_CTR0; - if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) - return &pmu->fixed_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_fixed_counters); + + return &pmu->fixed_counters[index]; + } return NULL; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ee7db075dd4d..bf0d805e75c9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -732,8 +732,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu) static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { vcpu->arch.efer = efer; - if (!npt_enabled && !(efer & EFER_LMA)) - efer &= ~EFER_LME; + + if (!npt_enabled) { + /* Shadow paging assumes NX to be available. */ + efer |= EFER_NX; + + if (!(efer & EFER_LMA)) + efer &= ~EFER_LME; + } to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); @@ -990,33 +996,32 @@ static void svm_cpu_uninit(int cpu) static int svm_cpu_init(int cpu) { struct svm_cpu_data *sd; - int r; sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); if (!sd) return -ENOMEM; sd->cpu = cpu; - r = -ENOMEM; sd->save_area = alloc_page(GFP_KERNEL); if (!sd->save_area) - goto err_1; + goto free_cpu_data; if (svm_sev_enabled()) { - r = -ENOMEM; sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) - goto err_1; + goto free_save_area; } per_cpu(svm_data, cpu) = sd; return 0; -err_1: +free_save_area: + __free_page(sd->save_area); +free_cpu_data: kfree(sd); - return r; + return -ENOMEM; } @@ -1290,6 +1295,47 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) control->pause_filter_count, old); } +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1344,6 +1390,8 @@ static __init int svm_hardware_setup(void) } } + svm_adjust_mmio_mask(); + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) @@ -1805,7 +1853,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, return NULL; /* Pin the user virtual address. */ - npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages); + npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); if (npinned != npages) { pr_err("SEV: Failure locking %lu pages.\n", npages); goto err; @@ -1870,6 +1918,10 @@ static struct kvm *svm_vm_alloc(void) struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm), GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL); + + if (!kvm_svm) + return NULL; + return &kvm_svm->kvm; } @@ -2134,12 +2186,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!svm->vcpu.arch.user_fpu) { + printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); + err = -ENOMEM; + goto free_partial_svm; + } + svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL_ACCOUNT); if (!svm->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; - goto free_partial_svm; + goto free_user_fpu; } err = kvm_vcpu_init(&svm->vcpu, kvm, id); @@ -2202,6 +2262,8 @@ uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); free_partial_svm: kmem_cache_free(kvm_vcpu_cache, svm); out: @@ -2232,6 +2294,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); + kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -3165,8 +3228,8 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) return NESTED_EXIT_HOST; break; case SVM_EXIT_EXCP_BASE + PF_VECTOR: - /* When we're shadowing, trap PFs, but not async PF */ - if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) + /* Trap async PF even if not shadowing */ + if (!npt_enabled || svm->vcpu.arch.apf.host_apf_reason) return NESTED_EXIT_HOST; break; default: @@ -3255,7 +3318,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr dst->iopm_base_pa = from->iopm_base_pa; dst->msrpm_base_pa = from->msrpm_base_pa; dst->tsc_offset = from->tsc_offset; - dst->asid = from->asid; + /* asid not copied, it is handled manually for svm->vmcb. */ dst->tlb_ctl = from->tlb_ctl; dst->int_ctl = from->int_ctl; dst->int_vector = from->int_vector; @@ -4531,9 +4594,9 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) */ kvm_for_each_vcpu(i, vcpu, kvm) { bool m = kvm_apic_match_dest(vcpu, apic, - icrl & KVM_APIC_SHORT_MASK, + icrl & APIC_SHORT_MASK, GET_APIC_DEST_FIELD(icrh), - icrl & KVM_APIC_DEST_MASK); + icrl & APIC_DEST_MASK); if (m && !avic_vcpu_is_running(vcpu)) kvm_vcpu_wake_up(vcpu); @@ -5155,8 +5218,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) +static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { + if (!vcpu->arch.apicv_active) + return -1; + kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); @@ -5165,6 +5231,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_cpu_get_apicid(vcpu->cpu)); else kvm_vcpu_wake_up(vcpu); + + return 0; } static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) @@ -5899,6 +5967,9 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + boot_cpu_has(X86_FEATURE_XSAVES); + /* Update nrips enabled cache */ svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); @@ -5985,6 +6056,11 @@ static bool svm_has_wbinvd_exit(void) return true; } +static bool svm_pku_supported(void) +{ + return false; +} + #define PRE_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ @@ -7274,6 +7350,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .xsaves_supported = svm_xsaves_supported, .umip_emulated = svm_umip_emulated, .pt_supported = svm_pt_supported, + .pku_supported = svm_pku_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4d47a2631d1f..e11689b876d7 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1462,6 +1462,25 @@ TRACE_EVENT(kvm_hv_send_ipi_ex, __entry->vector, __entry->format, __entry->valid_bank_mask) ); + +TRACE_EVENT(kvm_pv_tlb_flush, + TP_PROTO(unsigned int vcpu_id, bool need_flush_tlb), + TP_ARGS(vcpu_id, need_flush_tlb), + + TP_STRUCT__entry( + __field( unsigned int, vcpu_id ) + __field( bool, need_flush_tlb ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->need_flush_tlb = need_flush_tlb; + ), + + TP_printk("vcpu %u need_flush_tlb %s", __entry->vcpu_id, + __entry->need_flush_tlb ? "true" : "false") +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d6664ee3d127..7cd3f8683b8c 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept; extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_pml; +extern bool __read_mostly enable_apicv; extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 @@ -145,6 +146,11 @@ static inline bool vmx_umip_emulated(void) SECONDARY_EXEC_DESC; } +static inline bool vmx_pku_supported(void) +{ + return boot_cpu_has(X86_FEATURE_PKU); +} + static inline bool cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 22437ad335b0..c6deeed854f1 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2384,6 +2384,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, entry_failure_code)) return -EINVAL; + /* + * Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12 + * on nested VM-Exit, which can occur without actually running L2 and + * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with + * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the + * transition to HLT instead of running L2. + */ + if (enable_ept) + vmcs_writel(GUEST_CR3, vmcs12->guest_cr3); + if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; @@ -3357,7 +3367,7 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); } -static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) +static int vmx_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; @@ -3395,8 +3405,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } - if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && - nested_exit_on_intr(vcpu)) { + if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) { if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); @@ -3970,17 +3979,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (likely(!vmx->fail)) { - /* - * TODO: SDM says that with acknowledge interrupt on - * exit, bit 31 of the VM-exit interrupt information - * (valid interrupt) is always set to 1 on - * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't - * need kvm_cpu_has_interrupt(). See the commit - * message for details. - */ - if (nested_exit_intr_ack_set(vcpu) && - exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && - kvm_cpu_has_interrupt(vcpu)) { + if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && + nested_exit_intr_ack_set(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); WARN_ON(irq < 0); vmcs12->vm_exit_intr_info = irq | @@ -4452,8 +4452,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) vmx_instruction_info, true, len, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) + if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) { kvm_inject_page_fault(vcpu, &e); + return 1; + } } return nested_vmx_succeed(vcpu); @@ -4882,24 +4884,17 @@ fail: return 1; } - -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +/* + * Return true if an IO instruction with the specified port and size should cause + * a VM-exit into L1. + */ +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size) { - unsigned long exit_qualification; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); gpa_t bitmap, last_bitmap; - unsigned int port; - int size; u8 b; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - last_bitmap = (gpa_t)-1; b = -1; @@ -4926,6 +4921,24 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, return false; } +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + unsigned short port; + int size; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + return nested_vmx_check_io_bitmaps(vcpu, port, size); +} + /* * Return 1 if we should exit from L2 to L1 to handle an MSR access access, * rather than handle it ourselves in L0. I.e., check whether L1 expressed @@ -5112,7 +5125,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); - switch (exit_reason) { + switch ((u16)exit_reason) { case EXIT_REASON_EXCEPTION_NMI: if (is_nmi(intr_info)) return false; @@ -5546,8 +5559,7 @@ void nested_vmx_vcpu_setup(void) * bit in the high half is on if the corresponding bit in the control field * may be on. See also vmx_control_verify(). */ -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, - bool apicv) +void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) { /* * Note that as a general rule, the high half of the MSRs (bits in @@ -5574,7 +5586,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | - (apicv ? PIN_BASED_POSTED_INTR : 0); + (enable_apicv ? PIN_BASED_POSTED_INTR : 0); msrs->pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 29d205bb4e4f..161c305de3f3 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -7,8 +7,7 @@ #include "vmx.h" void vmx_leave_nested(struct kvm_vcpu *vcpu); -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, - bool apicv); +void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps); void nested_vmx_hardware_unsetup(void); __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_vcpu_setup(void); @@ -22,6 +21,8 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, u32 vmx_instruction_info, bool wr, int len, gva_t *ret); +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size); static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 68d231d49c7a..cb3d9872a88d 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -84,10 +84,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu, static unsigned intel_find_fixed_event(int idx) { - if (idx >= ARRAY_SIZE(fixed_pmc_events)) + u32 event; + size_t size = ARRAY_SIZE(fixed_pmc_events); + + if (idx >= size) return PERF_COUNT_HW_MAX; - return intel_arch_events[fixed_pmc_events[idx]].event_type; + event = fixed_pmc_events[array_index_nospec(idx, size)]; + return intel_arch_events[event].event_type; } /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ @@ -128,16 +132,20 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); struct kvm_pmc *counters; + unsigned int num_counters; idx &= ~(3u << 30); - if (!fixed && idx >= pmu->nr_arch_gp_counters) - return NULL; - if (fixed && idx >= pmu->nr_arch_fixed_counters) + if (fixed) { + counters = pmu->fixed_counters; + num_counters = pmu->nr_arch_fixed_counters; + } else { + counters = pmu->gp_counters; + num_counters = pmu->nr_arch_gp_counters; + } + if (idx >= num_counters) return NULL; - counters = fixed ? pmu->fixed_counters : pmu->gp_counters; *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; - - return &counters[idx]; + return &counters[array_index_nospec(idx, num_counters)]; } static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index d4cb1945b2e3..f83368b5af87 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -86,6 +86,9 @@ ENTRY(vmx_vmexit) /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ + or $1, %_ASM_AX + pop %_ASM_AX .Lvmexit_skip_rsb: #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e5fdd5f14c8e..b177e25e3786 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv = 1; +bool __read_mostly enable_apicv = 1; module_param(enable_apicv, bool, S_IRUGO); /* @@ -402,6 +402,7 @@ const u32 vmx_msr_index[] = { MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, #endif MSR_EFER, MSR_TSC_AUX, MSR_STAR, + MSR_IA32_TSX_CTRL, }; #if IS_ENABLED(CONFIG_HYPERV) @@ -565,6 +566,23 @@ struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) return NULL; } +static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data) +{ + int ret = 0; + + u64 old_msr_data = msr->data; + msr->data = data; + if (msr - vmx->guest_msrs < vmx->save_nmsrs) { + preempt_disable(); + ret = kvm_set_shared_msr(msr->index, msr->data, + msr->mask); + preempt_enable(); + if (ret) + msr->data = old_msr_data; + } + return ret; +} + void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) { vmcs_clear(loaded_vmcs->vmcs); @@ -575,43 +593,15 @@ void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) } #ifdef CONFIG_KEXEC_CORE -/* - * This bitmap is used to indicate whether the vmclear - * operation is enabled on all cpus. All disabled by - * default. - */ -static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; - -static inline void crash_enable_local_vmclear(int cpu) -{ - cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline void crash_disable_local_vmclear(int cpu) -{ - cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - -static inline int crash_local_vmclear_enabled(int cpu) -{ - return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); -} - static void crash_vmclear_local_loaded_vmcss(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; - if (!crash_local_vmclear_enabled(cpu)) - return; - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), loaded_vmcss_on_cpu_link) vmcs_clear(v->vmcs); } -#else -static inline void crash_enable_local_vmclear(int cpu) { } -static inline void crash_disable_local_vmclear(int cpu) { } #endif /* CONFIG_KEXEC_CORE */ static void __loaded_vmcs_clear(void *arg) @@ -623,19 +613,24 @@ static void __loaded_vmcs_clear(void *arg) return; /* vcpu migration can race with cpu offline */ if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) per_cpu(current_vmcs, cpu) = NULL; - crash_disable_local_vmclear(cpu); + + vmcs_clear(loaded_vmcs->vmcs); + if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) + vmcs_clear(loaded_vmcs->shadow_vmcs); + list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); /* - * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link - * is before setting loaded_vmcs->vcpu to -1 which is done in - * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist - * then adds the vmcs into percpu list before it is deleted. + * Ensure all writes to loaded_vmcs, including deleting it from its + * current percpu list, complete before setting loaded_vmcs->vcpu to + * -1, otherwise a different cpu can see vcpu == -1 first and add + * loaded_vmcs to its percpu list before it's deleted from this cpu's + * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). */ smp_wmb(); - loaded_vmcs_init(loaded_vmcs); - crash_enable_local_vmclear(cpu); + loaded_vmcs->cpu = -1; + loaded_vmcs->launched = 0; } void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) @@ -896,17 +891,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) u64 guest_efer = vmx->vcpu.arch.efer; u64 ignore_bits = 0; - if (!enable_ept) { - /* - * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing - * host CPUID is more efficient than testing guest CPUID - * or CR4. Host SMEP is anyway a requirement for guest SMEP. - */ - if (boot_cpu_has(X86_FEATURE_SMEP)) - guest_efer |= EFER_NX; - else if (!(guest_efer & EFER_NX)) - ignore_bits |= EFER_NX; - } + /* Shadow paging assumes NX to be available. */ + if (!enable_ept) + guest_efer |= EFER_NX; /* * LMA and LME handled by hardware; SCE meaningless outside long mode. @@ -1237,18 +1224,17 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!already_loaded) { loaded_vmcs_clear(vmx->loaded_vmcs); local_irq_disable(); - crash_disable_local_vmclear(cpu); /* - * Read loaded_vmcs->cpu should be before fetching - * loaded_vmcs->loaded_vmcss_on_cpu_link. - * See the comments in __loaded_vmcs_clear(). + * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to + * this cpu's percpu list, otherwise it may not yet be deleted + * from its previous cpu's percpu list. Pairs with the + * smb_wmb() in __loaded_vmcs_clear(). */ smp_rmb(); list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, &per_cpu(loaded_vmcss_on_cpu, cpu)); - crash_enable_local_vmclear(cpu); local_irq_enable(); } @@ -1292,7 +1278,6 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) decache_tsc_multiplier(vmx); vmx_vcpu_pi_load(vcpu, cpu); - vmx->host_pkru = read_pkru(); vmx->host_debugctlmsr = get_debugctlmsr(); } @@ -1577,6 +1562,9 @@ static void setup_msrs(struct vcpu_vmx *vmx) index = __find_msr_index(vmx, MSR_TSC_AUX); if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL); + if (index >= 0) + move_msr_up(vmx, index, save_nmsrs++); vmx->save_nmsrs = save_nmsrs; vmx->guest_msrs_dirty = true; @@ -1676,6 +1664,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); + case MSR_IA32_TSX_CTRL: + if (!msr_info->host_initiated && + !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) + return 1; + goto find_shared_msr; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -1718,10 +1711,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, &msr_info->data); case MSR_IA32_XSS: - if (!vmx_xsaves_supported() || - (!msr_info->host_initiated && - !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) return 1; msr_info->data = vcpu->arch.ia32_xss; break; @@ -1775,8 +1766,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) return 1; - /* Else, falls through */ + goto find_shared_msr; default: + find_shared_msr: msr = find_msr_entry(vmx, msr_info->index); if (msr) { msr_info->data = msr->data; @@ -1871,6 +1863,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); break; + case MSR_IA32_TSX_CTRL: + if (!msr_info->host_initiated && + !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR)) + return 1; + if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR)) + return 1; + goto find_shared_msr; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -1936,10 +1935,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; return vmx_set_vmx_msr(vcpu, msr_index, data); case MSR_IA32_XSS: - if (!vmx_xsaves_supported() || - (!msr_info->host_initiated && - !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) return 1; /* * The only supported bit as of Skylake is bit 8, but @@ -1948,11 +1945,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data != 0) return 1; vcpu->arch.ia32_xss = data; - if (vcpu->arch.ia32_xss != host_xss) - add_atomic_switch_msr(vmx, MSR_IA32_XSS, - vcpu->arch.ia32_xss, host_xss, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_XSS); + if (vcpu->arch.xsaves_enabled) { + if (vcpu->arch.ia32_xss != host_xss) + add_atomic_switch_msr(vmx, MSR_IA32_XSS, + vcpu->arch.ia32_xss, host_xss, false); + else + clear_atomic_switch_msr(vmx, MSR_IA32_XSS); + } break; case MSR_IA32_RTIT_CTL: if ((pt_mode != PT_MODE_HOST_GUEST) || @@ -2006,6 +2005,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_num_address_ranges))) return 1; + if (is_noncanonical_address(data, vcpu)) + return 1; if (index % 2) vmx->pt_desc.guest.addr_b[index / 2] = data; else @@ -2018,23 +2019,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) return 1; - /* Else, falls through */ + goto find_shared_msr; + default: + find_shared_msr: msr = find_msr_entry(vmx, msr_index); - if (msr) { - u64 old_msr_data = msr->data; - msr->data = data; - if (msr - vmx->guest_msrs < vmx->save_nmsrs) { - preempt_disable(); - ret = kvm_set_shared_msr(msr->index, msr->data, - msr->mask); - preempt_enable(); - if (ret) - msr->data = old_msr_data; - } - break; - } - ret = kvm_set_msr_common(vcpu, msr_info); + if (msr) + ret = vmx_set_guest_msr(vmx, msr, data); + else + ret = kvm_set_msr_common(vcpu, msr_info); } return ret; @@ -2116,21 +2109,6 @@ static int hardware_enable(void) !hv_get_vp_assist_page(cpu)) return -EFAULT; - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - - /* - * Now we can enable the vmclear operation in kdump - * since the loaded_vmcss_on_cpu list on this cpu - * has been initialized. - * - * Though the cpu is not in VMX operation now, there - * is no problem to enable the vmclear operation - * for the loaded_vmcss_on_cpu list is empty! - */ - crash_enable_local_vmclear(cpu); - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); test_bits = FEATURE_CONTROL_LOCKED; @@ -2837,6 +2815,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; @@ -2859,6 +2840,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { struct kvm *kvm = vcpu->kvm; + bool update_guest_cr3 = true; unsigned long guest_cr3; u64 eptp; @@ -2875,15 +2857,17 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); } - if (enable_unrestricted_guest || is_paging(vcpu) || - is_guest_mode(vcpu)) - guest_cr3 = kvm_read_cr3(vcpu); - else + if (!enable_unrestricted_guest && !is_paging(vcpu)) guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; + else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + guest_cr3 = vcpu->arch.cr3; + else /* vmcs01.GUEST_CR3 is already up-to-date. */ + update_guest_cr3 = false; ept_load_pdptrs(vcpu); } - vmcs_writel(GUEST_CR3, guest_cr3); + if (update_guest_cr3) + vmcs_writel(GUEST_CR3, guest_cr3); } int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) @@ -3708,24 +3692,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, * 2. If target vcpu isn't running(root mode), kick it to pick up the * interrupt from PIR in next vmentry. */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { struct vcpu_vmx *vmx = to_vmx(vcpu); int r; r = vmx_deliver_nested_posted_interrupt(vcpu, vector); if (!r) - return; + return 0; + + if (!vcpu->arch.apicv_active) + return -1; if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; + return 0; /* If a previous notification has sent the IPI, nothing to do. */ if (pi_test_and_set_on(&vmx->pi_desc)) - return; + return 0; if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); + + return 0; } /* @@ -3796,6 +3785,8 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) { + BUILD_BUG_ON(KVM_CR4_GUEST_OWNED_BITS & ~KVM_POSSIBLE_CR4_GUEST_BITS); + vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; if (enable_ept) vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; @@ -3912,6 +3903,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); + vcpu->arch.xsaves_enabled = xsaves_enabled; + if (!xsaves_enabled) exec_control &= ~SECONDARY_EXEC_XSAVES; @@ -4001,7 +3994,7 @@ static void ept_set_mmio_spte_mask(void) * of an EPT paging-structure entry is 110b (write/execute). */ kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE); + VMX_EPT_MISCONFIG_WX_VALUE, 0); } #define VMX_XSS_EXIT_BITMAP 0 @@ -4084,7 +4077,20 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) continue; vmx->guest_msrs[j].index = i; vmx->guest_msrs[j].data = 0; - vmx->guest_msrs[j].mask = -1ull; + + switch (index) { + case MSR_IA32_TSX_CTRL: + /* + * No need to pass TSX_CTRL_CPUID_CLEAR through, so + * let's avoid changing CPUID bits under the host + * kernel's feet. + */ + vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + break; + default: + vmx->guest_msrs[j].mask = -1ull; + break; + } ++vmx->nmsrs; } @@ -4342,8 +4348,13 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { - return (!to_vmx(vcpu)->nested.nested_run_pending && - vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + if (to_vmx(vcpu)->nested.nested_run_pending) + return false; + + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + return true; + + return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); } @@ -4437,7 +4448,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, */ static void kvm_machine_check(void) { -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) +#if defined(CONFIG_X86_MCE) struct pt_regs regs = { .cs = 3, /* Fake ring 3 no matter what the guest ran on */ .flags = X86_EFLAGS_IF, @@ -6098,7 +6109,11 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) { - return pi_test_on(vcpu_to_pi_desc(vcpu)); + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + return pi_test_on(pi_desc) || + (pi_test_sn(pi_desc) && + !bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS)); } static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) @@ -6437,11 +6452,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) kvm_load_guest_xcr0(vcpu); - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && - vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vcpu->arch.pkru); - pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); @@ -6521,18 +6531,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_exit(vmx); - /* - * eager fpu is enabled if PKEY is supported and CR4 is switched - * back on host, so it is safe to read guest PKRU from current - * XSAVE. - */ - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = rdpkru(); - if (vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vmx->host_pkru); - } - kvm_put_guest_xcr0(vcpu); vmx->nested.nested_run_pending = 0; @@ -6558,6 +6556,10 @@ static struct kvm *vmx_vm_alloc(void) struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL); + + if (!kvm_vmx) + return NULL; + return &kvm_vmx->kvm; } @@ -6577,6 +6579,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); kmem_cache_free(kvm_vcpu_cache, vmx); } @@ -6592,12 +6595,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx) return ERR_PTR(-ENOMEM); + vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vmx->vcpu.arch.user_fpu) { + printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); + err = -ENOMEM; + goto free_partial_vcpu; + } + vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL_ACCOUNT); if (!vmx->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; - goto free_partial_vcpu; + goto free_user_fpu; } vmx->vpid = allocate_vpid(); @@ -6662,8 +6673,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, - vmx_capability.ept, - kvm_vcpu_apicv_active(&vmx->vcpu)); + vmx_capability.ept); else memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); @@ -6694,6 +6704,8 @@ uninit_vcpu: free_vcpu: free_vpid(vmx->vpid); kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); free_partial_vcpu: kmem_cache_free(kvm_vcpu_cache, vmx); return ERR_PTR(err); @@ -6744,8 +6756,7 @@ static void __init vmx_check_processor_compat(void *rtn) if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) *(int *)rtn = -EIO; if (nested) - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, - enable_apicv); + nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept); if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", smp_processor_id()); @@ -6957,6 +6968,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ + vcpu->arch.xsaves_enabled = false; + if (cpu_has_secondary_exec_ctrls()) { vmx_compute_secondary_exec_control(vmx); vmcs_set_secondary_exec_control(vmx->secondary_exec_control); @@ -6977,6 +6991,15 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (boot_cpu_has(X86_FEATURE_INTEL_PT) && guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) update_intel_pt_cfg(vcpu); + + if (boot_cpu_has(X86_FEATURE_RTM)) { + struct shared_msr_entry *msr; + msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL); + if (msr) { + bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM); + vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE); + } + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -6990,6 +7013,40 @@ static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) to_vmx(vcpu)->req_immediate_exit = true; } +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned short port; + bool intercept; + int size; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + port = info->src_val; + size = info->dst_bytes; + } else { + port = info->dst_val; + size = info->src_bytes; + } + + /* + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction + * VM-exits depend on the 'unconditional IO exiting' VM-execution + * control. + * + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. + */ + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + intercept = nested_cpu_has(vmcs12, + CPU_BASED_UNCOND_IO_EXITING); + else + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -6997,19 +7054,45 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + switch (info->intercept) { /* * RDPID causes #UD if disabled through secondary execution controls. * Because it is marked as EmulateOnUD, we need to intercept it here. */ - if (info->intercept == x86_intercept_rdtscp && - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { - ctxt->exception.vector = UD_VECTOR; - ctxt->exception.error_code_valid = false; - return X86EMUL_PROPAGATE_FAULT; - } + case x86_intercept_rdtscp: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + break; + + case x86_intercept_in: + case x86_intercept_ins: + case x86_intercept_out: + case x86_intercept_outs: + return vmx_check_intercept_io(vcpu, info); + + case x86_intercept_lgdt: + case x86_intercept_lidt: + case x86_intercept_lldt: + case x86_intercept_ltr: + case x86_intercept_sgdt: + case x86_intercept_sidt: + case x86_intercept_sldt: + case x86_intercept_str: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) + return X86EMUL_CONTINUE; + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + break; /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; + default: + break; + } + + return X86EMUL_UNHANDLEABLE; } #ifdef CONFIG_X86_64 @@ -7104,11 +7187,11 @@ static void vmx_flush_log_dirty(struct kvm *kvm) kvm_flush_pml_buffers(kvm); } -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) { struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa, dst; + gpa_t dst; if (is_guest_mode(vcpu)) { WARN_ON_ONCE(vmx->nested.pml_full); @@ -7127,7 +7210,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) return 1; } - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; + gpa &= ~0xFFFull; dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, @@ -7568,7 +7651,7 @@ static __init int hardware_setup(void) if (nested) { nested_vmx_setup_ctls_msrs(&vmcs_config.nested, - vmx_capability.ept, enable_apicv); + vmx_capability.ept); r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); if (r) @@ -7702,6 +7785,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, .pt_supported = vmx_pt_supported, + .pku_supported = vmx_pku_supported, .request_immediate_exit = vmx_request_immediate_exit, @@ -7788,7 +7872,7 @@ module_exit(vmx_exit); static int __init vmx_init(void) { - int r; + int r, cpu; #if IS_ENABLED(CONFIG_HYPERV) /* @@ -7839,6 +7923,12 @@ static int __init vmx_init(void) } } + for_each_possible_cpu(cpu) { + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); + spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + } + #ifdef CONFIG_KEXEC_CORE rcu_assign_pointer(crash_vmclear_loaded_vmcss, crash_vmclear_local_loaded_vmcss); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 61128b48c503..6935eff28122 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -256,8 +256,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - u32 host_pkru; - unsigned long host_debugctlmsr; u64 msr_ia32_power_ctl; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cfd880ab841d..df6f6df902c6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -90,6 +90,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif +static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; + #define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ #define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ @@ -295,13 +297,14 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); int err; - if (((value ^ smsr->values[slot].curr) & mask) == 0) + value = (value & mask) | (smsr->values[slot].host & ~mask); + if (value == smsr->values[slot].curr) return 0; - smsr->values[slot].curr = value; err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); if (err) return 1; + smsr->values[slot].curr = value; if (!smsr->registered) { smsr->urn.on_user_return = kvm_on_user_return; user_return_notifier_register(&smsr->urn); @@ -437,6 +440,14 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) * for #DB exceptions under VMX. */ vcpu->arch.dr6 ^= payload & DR6_RTM; + + /* + * The #DB payload is defined as compatible with the 'pending + * debug exceptions' field under VMX, not DR6. While bit 12 is + * defined in the 'pending debug exceptions' field (enabled + * breakpoint), it is reserved and must be zero in DR6. + */ + vcpu->arch.dr6 &= ~BIT(12); break; case PF_VECTOR: vcpu->arch.cr2 = payload; @@ -816,11 +827,25 @@ void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); vcpu->guest_xcr0_loaded = 1; } + + if (static_cpu_has(X86_FEATURE_PKU) && + (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || + (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) && + vcpu->arch.pkru != vcpu->arch.host_pkru) + __write_pkru(vcpu->arch.pkru); } EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0); void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) { + if (static_cpu_has(X86_FEATURE_PKU) && + (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || + (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) { + vcpu->arch.pkru = rdpkru(); + if (vcpu->arch.pkru != vcpu->arch.host_pkru) + __write_pkru(vcpu->arch.host_pkru); + } + if (vcpu->guest_xcr0_loaded) { if (vcpu->arch.xcr0 != host_xcr0) xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); @@ -880,9 +905,38 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); +static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) +{ + u64 reserved_bits = CR4_RESERVED_BITS; + + if (!cpu_has(c, X86_FEATURE_XSAVE)) + reserved_bits |= X86_CR4_OSXSAVE; + + if (!cpu_has(c, X86_FEATURE_SMEP)) + reserved_bits |= X86_CR4_SMEP; + + if (!cpu_has(c, X86_FEATURE_SMAP)) + reserved_bits |= X86_CR4_SMAP; + + if (!cpu_has(c, X86_FEATURE_FSGSBASE)) + reserved_bits |= X86_CR4_FSGSBASE; + + if (!cpu_has(c, X86_FEATURE_PKU)) + reserved_bits |= X86_CR4_PKE; + + if (!cpu_has(c, X86_FEATURE_LA57) && + !(cpuid_ecx(0x7) & bit(X86_FEATURE_LA57))) + reserved_bits |= X86_CR4_LA57; + + if (!cpu_has(c, X86_FEATURE_UMIP) && !kvm_x86_ops->umip_emulated()) + reserved_bits |= X86_CR4_UMIP; + + return reserved_bits; +} + static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - if (cr4 & CR4_RESERVED_BITS) + if (cr4 & cr4_reserved_bits) return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) @@ -921,6 +975,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) return 1; + if ((cr4 ^ old_cr4) & X86_CR4_LA57) + return 1; } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, @@ -1048,9 +1104,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - vcpu->arch.db[dr] = val; + vcpu->arch.db[array_index_nospec(dr, size)] = val; if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) vcpu->arch.eff_db[dr] = val; break; @@ -1087,9 +1145,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - *val = vcpu->arch.db[dr]; + *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: /* fall through */ @@ -1250,27 +1310,18 @@ u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; /* - * On TAA affected systems, export MDS_NO=0 when: - * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. - * - Updated microcode is present. This is detected by - * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures - * that VERW clears CPU buffers. - * - * When MDS_NO=0 is exported, guests deploy clear CPU buffer - * mitigation and don't complain: - * - * "Vulnerable: Clear CPU buffers attempted, no microcode" - * - * If TSX is disabled on the system, guests are also mitigated against - * TAA and clear CPU buffer mitigation is not required for guests. + * On TAA affected systems: + * - nothing to do if TSX is disabled on the host. + * - we emulate TSX_CTRL if present on the host. + * This lets the guest use VERW to clear CPU buffers. */ - if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) && - (data & ARCH_CAP_TSX_CTRL_MSR)) - data &= ~ARCH_CAP_MDS_NO; + if (!boot_cpu_has(X86_FEATURE_RTM)) + data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR); + else if (!boot_cpu_has_bug(X86_BUG_TAA)) + data |= ARCH_CAP_TAA_NO; return data; } -EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); static int kvm_get_msr_feature(struct kvm_msr_entry *msr) { @@ -2372,7 +2423,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore @@ -2467,43 +2521,47 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) static void record_steal_time(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) + /* -EAGAIN is returned in atomic context so we can just return. */ + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false)) return; + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + /* * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. */ - if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, + st->preempted & KVM_VCPU_FLUSH_TLB); + if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb(vcpu, false); - if (vcpu->arch.st.steal.version & 1) - vcpu->arch.st.steal.version += 1; /* first time write, random junk */ + vcpu->arch.st.preempted = 0; - vcpu->arch.st.steal.version += 1; + if (st->version & 1) + st->version += 1; /* first time write, random junk */ - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + st->version += 1; smp_wmb(); - vcpu->arch.st.steal.steal += current->sched_info.run_delay - + st->steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); - smp_wmb(); - vcpu->arch.st.steal.version += 1; + st->version += 1; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -2570,7 +2628,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_mtrr_set_msr(vcpu, msr, data); case MSR_IA32_APICBASE: return kvm_set_apic_base(vcpu, msr_info); - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_write(vcpu, msr, data); case MSR_IA32_TSCDEADLINE: kvm_set_lapic_tscdeadline_msr(vcpu, data); @@ -2648,11 +2706,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & KVM_STEAL_RESERVED_MASK) return 1; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, - data & KVM_STEAL_VALID_BITS, - sizeof(struct kvm_steal_time))) - return 1; - vcpu->arch.st.msr_val = data; if (!(data & KVM_MSR_ENABLED)) @@ -2792,7 +2845,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + data = vcpu->arch.mce_banks[offset]; break; } @@ -2869,7 +2925,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_APICBASE: msr_info->data = kvm_get_apic_base(vcpu); break; - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); break; case MSR_IA32_TSCDEADLINE: @@ -3305,9 +3361,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); + /* Save host pkru register if supported */ + vcpu->arch.host_pkru = read_pkru(); /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { @@ -3348,15 +3403,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; + if (vcpu->arch.st.preempted) + return; - kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal.preempted, - offsetof(struct kvm_steal_time, preempted), - sizeof(vcpu->arch.st.steal.preempted)); + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, + &vcpu->arch.st.cache, true)) + return; + + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + + st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; + + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -3492,7 +3557,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, unsigned bank_num = mcg_cap & 0xff, bank; r = -EINVAL; - if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) + if (!bank_num || bank_num > KVM_MAX_MCE_BANKS) goto out; if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000)) goto out; @@ -4285,6 +4350,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SET_NESTED_STATE: { struct kvm_nested_state __user *user_kvm_nested_state = argp; struct kvm_nested_state kvm_state; + int idx; r = -EINVAL; if (!kvm_x86_ops->set_nested_state) @@ -4308,7 +4374,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE)) break; + idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_GET_SUPPORTED_HV_CPUID: { @@ -6214,11 +6282,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return r; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool write_fault_to_shadow_pgtable, int emulation_type) { - gpa_t gpa = cr2; + gpa_t gpa = cr2_or_gpa; kvm_pfn_t pfn; if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) @@ -6232,7 +6300,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, * Write permission should be allowed since only * write access need to be emulated. */ - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); /* * If the mapping is invalid in guest, let cpu retry @@ -6289,10 +6357,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, - unsigned long cr2, int emulation_type) + gpa_t cr2_or_gpa, int emulation_type) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - unsigned long last_retry_eip, last_retry_addr, gpa = cr2; + unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; last_retry_eip = vcpu->arch.last_retry_eip; last_retry_addr = vcpu->arch.last_retry_addr; @@ -6321,14 +6389,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (x86_page_table_writing_insn(ctxt)) return false; - if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) + if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) return false; vcpu->arch.last_retry_eip = ctxt->eip; - vcpu->arch.last_retry_addr = cr2; + vcpu->arch.last_retry_addr = cr2_or_gpa; if (!vcpu->arch.mmu->direct_map) - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); @@ -6472,11 +6540,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) return false; } -int x86_emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, - int emulation_type, - void *insn, - int insn_len) +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + int emulation_type, void *insn, int insn_len) { int r; struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; @@ -6519,8 +6584,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, - emulation_type)) + if (reexecute_instruction(vcpu, cr2_or_gpa, + write_fault_to_spt, + emulation_type)) return EMULATE_DONE; if (ctxt->have_exception) { /* @@ -6549,7 +6615,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } - if (retry_instruction(ctxt, cr2, emulation_type)) + if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) return EMULATE_DONE; /* this is needed for vmware backdoor interface to work since it @@ -6561,7 +6627,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, restart: /* Save the faulting GPA (cr2) in the address field */ - ctxt->exception.address = cr2; + ctxt->exception.address = cr2_or_gpa; r = x86_emulate_insn(ctxt); @@ -6569,7 +6635,7 @@ restart: return EMULATE_DONE; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt, emulation_type)) return EMULATE_DONE; @@ -6606,7 +6672,7 @@ restart: if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && ctxt->tf) + if (r == EMULATE_DONE && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) kvm_vcpu_do_singlestep(vcpu, &r); __kvm_set_rflags(vcpu, ctxt->eflags); } @@ -6968,33 +7034,25 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { .handle_intel_pt_intr = kvm_handle_intel_pt_intr, }; +extern u8 __read_mostly shadow_phys_bits; + static void kvm_set_mmio_spte_mask(void) { u64 mask; - int maxphyaddr = boot_cpu_data.x86_phys_bits; - - /* - * Set the reserved bits and the present bit of an paging-structure - * entry to generate page fault with PFER.RSV = 1. - */ /* - * Mask the uppermost physical address bit, which would be reserved as - * long as the supported physical address width is less than 52. + * Set a reserved PA bit in MMIO SPTEs to generate page faults with + * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT + * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports + * 52-bit physical addresses then there are no reserved PA bits in the + * PTEs and so the reserved PA approach must be disabled. */ - mask = 1ull << 51; - - /* Set the present bit. */ - mask |= 1ull; - - /* - * If reserved bit is not supported, clear the present bit to disable - * mmio page fault. - */ - if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) - mask &= ~1ull; + if (shadow_phys_bits < 52) + mask = BIT_ULL(51) | PT_PRESENT_MASK; + else + mask = 0; - kvm_mmu_set_mmio_spte_mask(mask, mask); + kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); } #ifdef CONFIG_X86_64 @@ -7350,7 +7408,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); } -static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) +static int inject_pending_event(struct kvm_vcpu *vcpu) { int r; @@ -7386,7 +7444,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) * from L2 to L1. */ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + r = kvm_x86_ops->check_nested_events(vcpu); if (r != 0) return r; } @@ -7448,7 +7506,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) * KVM_REQ_EVENT only on certain events and not unconditionally? */ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + r = kvm_x86_ops->check_nested_events(vcpu); if (r != 0) return r; } @@ -7759,9 +7817,8 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, - bool blockable) +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) { unsigned long apic_address; @@ -7772,8 +7829,6 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (start <= apic_address && apic_address < end) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); - - return 0; } void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) @@ -7921,7 +7976,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } - if (inject_pending_event(vcpu, req_int_win) != 0) + if (inject_pending_event(vcpu) != 0) req_immediate_exit = true; else { /* Enable SMI/NMI/IRQ window open exits if needed. @@ -8016,8 +8071,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) wait_lapic_expire(vcpu); guest_enter_irqoff(); - /* The preempt notifier should have taken care of the FPU already. */ - WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD)); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -8137,7 +8193,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) - kvm_x86_ops->check_nested_events(vcpu, false); + kvm_x86_ops->check_nested_events(vcpu); return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); @@ -8274,12 +8330,26 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } +static void kvm_save_current_fpu(struct fpu *fpu) +{ + /* + * If the target FPU state is not resident in the CPU registers, just + * memcpy() from current, else save CPU state directly to the target. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&fpu->state, ¤t->thread.fpu.state, + fpu_kernel_xstate_size); + else + copy_fpregs_to_fpstate(fpu); +} + /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); + kvm_save_current_fpu(vcpu->arch.user_fpu); + /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); @@ -8295,8 +8365,9 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); - copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + kvm_save_current_fpu(vcpu->arch.guest_fpu); + + copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); fpregs_unlock(); @@ -8517,6 +8588,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { vcpu_load(vcpu); + if (kvm_mpx_supported()) + kvm_load_guest_fpu(vcpu); kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && @@ -8525,6 +8598,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + if (kvm_mpx_supported()) + kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return 0; } @@ -8881,6 +8956,9 @@ static void fx_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; + struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + + kvm_release_pfn(cache->pfn, cache->dirty, cache); kvmclock_reset(vcpu); @@ -8941,13 +9019,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - vcpu->arch.apf.msr_val = 0; - - vcpu_load(vcpu); - kvm_mmu_unload(vcpu); - vcpu_put(vcpu); - - kvm_x86_ops->vcpu_free(vcpu); + kvm_arch_vcpu_free(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -9139,6 +9211,8 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); + if (kvm_has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -9480,6 +9554,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, { int i; + /* + * Clear out the previous array pointers for the KVM_MR_MOVE case. The + * old arrays will be freed by __kvm_set_memory_region() if installing + * the new memslot is successful. + */ + memset(&slot->arch, 0, sizeof(slot->arch)); + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { struct kvm_lpage_info *linfo; unsigned long ugfn; @@ -9542,11 +9623,18 @@ out_free: void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { + struct kvm_vcpu *vcpu; + int i; + /* * memslots->generation has been incremented. * mmio generation may have reached its maximum value. */ kvm_mmu_invalidate_mmio_sptes(kvm, gen); + + /* Force re-initialization of steal_time cache */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_kick(vcpu); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -9554,6 +9642,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { + if (change == KVM_MR_MOVE) + return kvm_arch_create_memslot(kvm, memslot, + mem->memory_size >> PAGE_SHIFT); + return 0; } @@ -9793,7 +9885,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) return; - vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true); + vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true); } static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) @@ -9876,7 +9968,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_not_present(work->arch.token, work->gva); + trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa); kvm_add_async_pf_gfn(vcpu, work->arch.gfn); if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || @@ -9904,7 +9996,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); - trace_kvm_async_pf_ready(work->arch.token, work->gva); + trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && !apf_get_user(vcpu, &val)) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a470ff0868c5..8c0b13cc3c6a 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -281,7 +281,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index e0e006f1624e..67a91fd38eef 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -24,6 +24,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" + " .align 16\n" "0: movq $0,(%[dst])\n" " addq $8,%[dst]\n" " decl %%ecx ; jnz 0b\n" diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index e0b85930dd77..5cb9f009f2be 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -333,7 +333,7 @@ AVXcode: 1 06: CLTS 07: SYSRET (o64) 08: INVD -09: WBINVD +09: WBINVD | WBNOINVD (F3) 0a: 0b: UD2 (1B) 0c: @@ -364,7 +364,7 @@ AVXcode: 1 # a ModR/M byte. 1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv -1c: +1c: Grp20 (1A),(1C) 1d: 1e: 1f: NOP Ev @@ -792,6 +792,8 @@ f3: Grp17 (1A) f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) +f9: MOVDIRI My,Gy EndTable Table: 3-byte opcode 2 (0x0f 0x3a) @@ -907,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev @@ -943,9 +945,9 @@ GrpTable: Grp6 EndTable GrpTable: Grp7 -0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) -1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) -2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv 5: rdpkru (110),(11B) | wrpkru (111),(11B) @@ -1020,7 +1022,7 @@ GrpTable: Grp15 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1051,6 +1053,10 @@ GrpTable: Grp19 6: vscatterpf1qps/d Wx (66),(ev) EndTable +GrpTable: Grp20 +0: cldemote Mb +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index f98a0c956764..9b41391867dc 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -107,6 +107,8 @@ static inline bool seg_writable(struct desc_struct *d) #define FPU_access_ok(y,z) if ( !access_ok(y,z) ) \ math_abort(FPU_info,SIGSEGV) #define FPU_abort math_abort(FPU_info, SIGSEGV) +#define FPU_copy_from_user(to, from, n) \ + do { if (copy_from_user(to, from, n)) FPU_abort; } while (0) #undef FPU_IGNORE_CODE_SEGV #ifdef FPU_IGNORE_CODE_SEGV @@ -122,7 +124,7 @@ static inline bool seg_writable(struct desc_struct *d) #define FPU_code_access_ok(z) FPU_access_ok((void __user *)FPU_EIP,z) #endif -#define FPU_get_user(x,y) get_user((x),(y)) -#define FPU_put_user(x,y) put_user((x),(y)) +#define FPU_get_user(x,y) do { if (get_user((x),(y))) FPU_abort; } while (0) +#define FPU_put_user(x,y) do { if (put_user((x),(y))) FPU_abort; } while (0) #endif diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c index f3779743d15e..fe6246ff9887 100644 --- a/arch/x86/math-emu/reg_ld_str.c +++ b/arch/x86/math-emu/reg_ld_str.c @@ -85,7 +85,7 @@ int FPU_load_extended(long double __user *s, int stnr) RE_ENTRANT_CHECK_OFF; FPU_access_ok(s, 10); - __copy_from_user(sti_ptr, s, 10); + FPU_copy_from_user(sti_ptr, s, 10); RE_ENTRANT_CHECK_ON; return FPU_tagof(sti_ptr); @@ -1126,9 +1126,9 @@ void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) /* Copy all registers in stack order. */ RE_ENTRANT_CHECK_OFF; FPU_access_ok(s, 80); - __copy_from_user(register_base + offset, s, other); + FPU_copy_from_user(register_base + offset, s, other); if (offset) - __copy_from_user(register_base, s + other, offset); + FPU_copy_from_user(register_base, s + other, offset); RE_ENTRANT_CHECK_ON; for (i = 0; i < 8; i++) { diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S index f031c0e19356..515cdee90df7 100644 --- a/arch/x86/math-emu/wm_sqrt.S +++ b/arch/x86/math-emu/wm_sqrt.S @@ -209,7 +209,7 @@ sqrt_stage_2_finish: #ifdef PARANOID /* It should be possible to get here only if the arg is ffff....ffff */ - cmp $0xffffffff,FPU_fsqrt_arg_1 + cmpl $0xffffffff,FPU_fsqrt_arg_1 jnz sqrt_stage_2_error #endif /* PARANOID */ diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 752ad11d6868..d9643647a9ce 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -178,7 +178,9 @@ static __init void setup_cpu_entry_area_ptes(void) #ifdef CONFIG_X86_32 unsigned long start, end; - BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + /* The +1 is for the readonly IDT: */ + BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); + BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 26a8b4b1b9ed..7aa188950d42 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -206,7 +206,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -void vmalloc_sync_all(void) +static void vmalloc_sync(void) { unsigned long address; @@ -214,7 +214,7 @@ void vmalloc_sync_all(void) return; for (address = VMALLOC_START & PMD_MASK; - address >= TASK_SIZE_MAX && address < FIXADDR_TOP; + address >= TASK_SIZE_MAX && address < VMALLOC_END; address += PMD_SIZE) { struct page *page; @@ -233,6 +233,16 @@ void vmalloc_sync_all(void) } } +void vmalloc_sync_mappings(void) +{ + vmalloc_sync(); +} + +void vmalloc_sync_unmappings(void) +{ + vmalloc_sync(); +} + /* * 32-bit: * @@ -335,11 +345,23 @@ out: #else /* CONFIG_X86_64: */ -void vmalloc_sync_all(void) +void vmalloc_sync_mappings(void) { + /* + * 64-bit mappings might allocate new p4d/pud pages + * that need to be propagated to all tasks' PGDs. + */ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } +void vmalloc_sync_unmappings(void) +{ + /* + * Unmappings never allocate or free p4d/pud pages. + * No work is required here. + */ +} + /* * 64-bit: * @@ -1500,9 +1522,8 @@ good_area: NOKPROBE_SYMBOL(do_user_addr_fault); /* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. + * Explicitly marked noinline such that the function tracer sees this as the + * page_fault entry point. */ static noinline void __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, @@ -1521,33 +1542,26 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, } NOKPROBE_SYMBOL(__do_page_fault); -static nokprobe_inline void -trace_page_fault_entries(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static __always_inline void +trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { + if (!trace_pagefault_enabled()) + return; + if (user_mode(regs)) trace_page_fault_user(address, regs, error_code); else trace_page_fault_kernel(address, regs, error_code); } -/* - * We must have this function blacklisted from kprobes, tagged with notrace - * and call read_cr2() before calling anything else. To avoid calling any - * kind of tracing machinery before we've observed the CR2 value. - * - * exception_{enter,exit}() contains all sorts of tracepoints. - */ -dotraplinkage void notrace -do_page_fault(struct pt_regs *regs, unsigned long error_code) +dotraplinkage void +do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - unsigned long address = read_cr2(); /* Get the faulting address */ enum ctx_state prev_state; prev_state = exception_enter(); - if (trace_pagefault_enabled()) - trace_page_fault_entries(address, regs, error_code); - + trace_page_fault_entries(regs, error_code, address); __do_page_fault(regs, error_code, address); exception_exit(prev_state); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fd10d91a6115..af352e228fa2 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -121,8 +121,6 @@ __ref void *alloc_low_pages(unsigned int num) } else { pfn = pgt_buf_end; pgt_buf_end += num; - printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", - pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); } for (i = 0; i < num; i++) { diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index b8ef8557d4b3..2a36902d418c 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -372,7 +372,7 @@ static void enter_uniprocessor(void) int cpu; int err; - if (downed_cpus == NULL && + if (!cpumask_available(downed_cpus) && !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { pr_notice("Failed to allocate mask\n"); goto out; @@ -402,7 +402,7 @@ static void leave_uniprocessor(void) int cpu; int err; - if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) + if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0) return; pr_notice("Re-enabling CPUs...\n"); for_each_cpu(cpu, downed_cpus) { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e14e95ea7338..7155b7e0899e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -42,7 +42,8 @@ struct cpa_data { unsigned long pfn; unsigned int flags; unsigned int force_split : 1, - force_static_prot : 1; + force_static_prot : 1, + force_flush_all : 1; struct page **pages; }; @@ -352,10 +353,10 @@ static void cpa_flush(struct cpa_data *data, int cache) return; } - if (cpa->numpages <= tlb_single_page_flush_ceiling) - on_each_cpu(__cpa_flush_tlb, cpa, 1); - else + if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) flush_tlb_all(); + else + on_each_cpu(__cpa_flush_tlb, cpa, 1); if (!cache) return; @@ -1584,6 +1585,8 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + cpa->force_flush_all = 1; + ret = __change_page_attr_set_clr(&alias_cpa, 0); if (ret) return ret; @@ -1604,6 +1607,7 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + cpa->force_flush_all = 1; /* * The high mapping range is imprecise, so ignore the * return value. @@ -2325,7 +2329,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; @@ -2334,12 +2338,6 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1f67b1e15bf6..eed797e8735a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -662,8 +662,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) fixmaps_set++; } -void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, - pgprot_t flags) +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags) { /* Sanitize 'prot' against any unsupported bits: */ pgprot_val(flags) &= __default_kernel_pte_mask; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index eaaed5bfc4a4..bd318043b259 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -138,6 +138,19 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_AX)); } +/* + * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64 + * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte + * of encoding. al,cl,dl,bl have simpler encoding. + */ +static bool is_ereg_8l(u32 reg) +{ + return is_ereg(reg) || + (1 << reg) & (BIT(BPF_REG_1) | + BIT(BPF_REG_2) | + BIT(BPF_REG_FP)); +} + static bool is_axreg(u32 reg) { return reg == BPF_REG_0; @@ -747,9 +760,8 @@ st: if (is_imm8(insn->off)) /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: /* Emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(dst_reg) || is_ereg(src_reg) || - /* We have to add extra byte for x86 SIL, DIL regs */ - src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + if (is_ereg(dst_reg) || is_ereg_8l(src_reg)) + /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index b29e82f190c7..4c9f10b73eb6 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2056,7 +2056,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, STACK_VAR(dst_hi)); EMIT(0x0, 4); } else { - EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0); + /* xor dst_hi,dst_hi */ + EMIT2(0x33, + add_2reg(0xC0, dst_hi, dst_hi)); } break; case BPF_DW: @@ -2157,6 +2159,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst_hi)); + } else { + /* mov dreg_lo,dst_lo */ + EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo)); + if (is_jmp64) + /* mov dreg_hi,dst_hi */ + EMIT2(0x89, + add_2reg(0xC0, dreg_hi, dst_hi)); } if (sstk) { @@ -2215,8 +2224,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: { bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; - u8 dreg_lo = dstk ? IA32_EAX : dst_lo; - u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 dreg_lo = IA32_EAX; + u8 dreg_hi = IA32_EDX; u8 sreg_lo = sstk ? IA32_ECX : src_lo; u8 sreg_hi = sstk ? IA32_EBX : src_hi; @@ -2241,17 +2250,19 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } /* and dreg_lo,sreg_lo */ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); - /* and dreg_hi,sreg_hi */ - EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); - /* or dreg_lo,dreg_hi */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); + if (is_jmp64) { + /* and dreg_hi,sreg_hi */ + EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); + /* or dreg_lo,dreg_hi */ + EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); + } goto emit_cond_jmp; } case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: { bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; - u8 dreg_lo = dstk ? IA32_EAX : dst_lo; - u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 dreg_lo = IA32_EAX; + u8 dreg_hi = IA32_EDX; u8 sreg_lo = IA32_ECX; u8 sreg_hi = IA32_EBX; u32 hi; @@ -2264,6 +2275,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst_hi)); + } else { + /* mov dreg_lo,dst_lo */ + EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo)); + if (is_jmp64) + /* mov dreg_hi,dst_hi */ + EMIT2(0x89, + add_2reg(0xC0, dreg_hi, dst_hi)); } /* mov ecx,imm32 */ diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 527e69b12002..0c67a5a94de3 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -572,6 +572,10 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa1ec, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa1ed, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa26c, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa26d, pci_invalid_bar); /* * Device [1022:7808] @@ -589,6 +593,17 @@ static void pci_fixup_amd_ehci_pme(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7808, pci_fixup_amd_ehci_pme); /* + * Device [1022:7914] + * When in D0, PME# doesn't get asserted when plugging USB 2.0 device. + */ +static void pci_fixup_amd_fch_xhci_pme(struct pci_dev *dev) +{ + dev_info(&dev->dev, "PME# does not work under D0, disabling it\n"); + dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7914, pci_fixup_amd_fch_xhci_pme); + +/* * Apple MacBook Pro: Avoid [mem 0x7fa00000-0x7fbfffff] * * Using the [mem 0x7fa00000-0x7fbfffff] region, e.g., by assigning it to diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index a7189a3b4d70..9b1b133c860a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -481,7 +481,6 @@ void __init efi_init(void) efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -506,14 +505,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, @@ -894,9 +895,6 @@ static void __init kexec_enter_virtual_mode(void) if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) runtime_code_page_mkexec(); - - /* clean DUMMY object */ - efi_delete_dummy_variable(); #endif } @@ -933,16 +931,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -956,8 +952,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -965,12 +960,11 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); @@ -990,9 +984,9 @@ static void __init __efi_enter_virtual_mode(void) } if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } efi_free_boot_services(); @@ -1021,6 +1015,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 08ce8177c3af..e39c930cfbd1 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -316,7 +316,7 @@ void efi_sync_low_kernel_mappings(void) static inline phys_addr_t virt_to_phys_or_null_size(void *va, unsigned long size) { - bool bad_size; + phys_addr_t pa; if (!va) return 0; @@ -324,16 +324,13 @@ virt_to_phys_or_null_size(void *va, unsigned long size) if (virt_addr_valid(va)) return virt_to_phys(va); - /* - * A fully aligned variable on the stack is guaranteed not to - * cross a page bounary. Try to catch strings on the stack by - * checking that 'size' is a power of two. - */ - bad_size = size > PAGE_SIZE || !is_power_of_2(size); + pa = slow_virt_to_phys(va); - WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size); + /* check if the object crosses a page boundary */ + if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK)) + return 0; - return slow_virt_to_phys(va); + return pa; } #define virt_to_phys_or_null(addr) \ @@ -392,11 +389,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); @@ -790,6 +788,8 @@ static efi_status_t efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, u32 *attr, unsigned long *data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); efi_status_t status; u32 phys_name, phys_vendor, phys_attr; u32 phys_data_size, phys_data; @@ -797,14 +797,19 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_data_size = virt_to_phys_or_null(data_size); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); phys_attr = virt_to_phys_or_null(attr); phys_data = virt_to_phys_or_null_size(data, *data_size); - status = efi_thunk(get_variable, phys_name, phys_vendor, - phys_attr, phys_data_size, phys_data); + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_variable, phys_name, phys_vendor, + phys_attr, phys_data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -815,19 +820,25 @@ static efi_status_t efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); u32 phys_name, phys_vendor, phys_data; efi_status_t status; unsigned long flags; spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - /* If data_size is > sizeof(u32) we've got problems */ - status = efi_thunk(set_variable, phys_name, phys_vendor, - attr, data_size, phys_data); + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -839,6 +850,8 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); u32 phys_name, phys_vendor, phys_data; efi_status_t status; unsigned long flags; @@ -846,13 +859,17 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) return EFI_NOT_READY; + *vnd = *vendor; + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - /* If data_size is > sizeof(u32) we've got problems */ - status = efi_thunk(set_variable, phys_name, phys_vendor, - attr, data_size, phys_data); + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -864,21 +881,29 @@ efi_thunk_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); efi_status_t status; u32 phys_name_size, phys_name, phys_vendor; unsigned long flags; spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_name_size = virt_to_phys_or_null(name_size); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_name = virt_to_phys_or_null_size(name, *name_size); - status = efi_thunk(get_next_variable, phys_name_size, - phys_name, phys_vendor); + if (!phys_name) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_next_variable, phys_name_size, + phys_name, phys_vendor); spin_unlock_irqrestore(&efi_runtime_lock, flags); + *vendor = *vnd; return status; } diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 3b9fd679cea9..aefe845dff59 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -260,10 +260,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - /* No need to reserve regions that will never be freed. */ - if (md.attribute & EFI_MEMORY_RUNTIME) - return; - size += addr % EFI_PAGE_SIZE; size = round_up(size, EFI_PAGE_SIZE); addr = round_down(addr, EFI_PAGE_SIZE); @@ -293,6 +289,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) early_memunmap(new, new_size); efi_memmap_install(new_phys, num_entries); + e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__update_table(e820_table); } /* diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index c78bfc16a3ca..26d1f6693789 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -216,7 +216,7 @@ static u32 __init olpc_dt_get_board_revision(void) return be32_to_cpu(rev); } -int olpc_dt_compatible_match(phandle node, const char *compat) +static int __init olpc_dt_compatible_match(phandle node, const char *compat) { char buf[64], *p; int plen, len; diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index b81b5172cf99..2cfa0caef133 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -15,7 +15,10 @@ $(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib targets += purgatory.ro +# Sanitizer, etc. runtimes are unavailable and cannot be linked here. +GCOV_PROFILE := n KASAN_SANITIZE := n +UBSAN_SANITIZE := n KCOV_INSTRUMENT := n # These are adjustments to the compiler flags used for objects that @@ -23,7 +26,7 @@ KCOV_INSTRUMENT := n PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) +PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 9166a071484c..1c2832e9f77d 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -69,7 +69,7 @@ BEGIN { lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix3_expr = "\\((F2|!F3|66&F2)\\)" lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 @@ -257,7 +257,7 @@ function convert_operands(count,opnd, i,j,imm,mod) return add_flags(imm, mod) } -/^[0-9a-f]+\:/ { +/^[0-9a-f]+:/ { if (NR == 1) next # get index diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 750f46ad018a..205b1176084f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -269,19 +269,41 @@ void xen_reboot(int reason) BUG(); } +static int reboot_reason = SHUTDOWN_reboot; +static bool xen_legacy_crash; void xen_emergency_restart(void) { - xen_reboot(SHUTDOWN_reboot); + xen_reboot(reboot_reason); } static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - if (!kexec_crash_loaded()) - xen_reboot(SHUTDOWN_crash); + if (!kexec_crash_loaded()) { + if (xen_legacy_crash) + xen_reboot(SHUTDOWN_crash); + + reboot_reason = SHUTDOWN_crash; + + /* + * If panic_timeout==0 then we are supposed to wait forever. + * However, to preserve original dom0 behavior we have to drop + * into hypervisor. (domU behavior is controlled by its + * config file) + */ + if (panic_timeout == 0) + panic_timeout = -1; + } return NOTIFY_DONE; } +static int __init parse_xen_legacy_crash(char *arg) +{ + xen_legacy_crash = true; + return 0; +} +early_param("xen_legacy_crash", parse_xen_legacy_crash); + static struct notifier_block xen_panic_block = { .notifier_call = xen_panic_event, .priority = INT_MIN diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 30c14cb343fc..c9b51e1a0c53 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -907,14 +907,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; +#ifdef CONFIG_X86_64 + unsigned int which; + u64 base; +#endif ret = 0; switch (msr) { #ifdef CONFIG_X86_64 - unsigned which; - u64 base; - case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; @@ -998,7 +999,8 @@ void __init xen_setup_vcpu_info_placement(void) __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); - pv_ops.mmu.read_cr2 = xen_read_cr2_direct; + pv_ops.mmu.read_cr2 = + __PV_IS_CALLEE_SAVE(xen_read_cr2_direct); } } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index beb44e22afdf..bda8700cd8c6 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long cr2) this_cpu_read(xen_vcpu)->arch.cr2 = cr2; } -static unsigned long xen_read_cr2(void) -{ - return this_cpu_read(xen_vcpu)->arch.cr2; -} - -unsigned long xen_read_cr2_direct(void) -{ - return this_cpu_read(xen_vcpu_info.arch.cr2); -} - static noinline void xen_flush_tlb(void) { struct mmuext_op *op; @@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void) } static const struct pv_mmu_ops xen_mmu_ops __initconst = { - .read_cr2 = xen_read_cr2, + .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2), .write_cr2 = xen_write_cr2, .read_cr3 = xen_read_cr3, diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 590fcf863006..0d2f07d23f72 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -58,6 +58,7 @@ static void cpu_bringup(void) { int cpu; + cr4_init(); cpu_init(); touch_softlockup_watchdog(); preempt_disable(); @@ -91,6 +92,7 @@ asmlinkage __visible void cpu_bringup_and_idle(void) cpu_bringup(); boot_init_stack_canary(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + prevent_tail_call_optimization(); } void xen_smp_intr_free_pv(unsigned int cpu) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 8019edd0125c..be104eef80be 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -10,6 +10,7 @@ #include <asm/percpu.h> #include <asm/processor-flags.h> #include <asm/frame.h> +#include <asm/asm.h> #include <linux/linkage.h> @@ -135,3 +136,18 @@ ENTRY(check_events) FRAME_END ret ENDPROC(check_events) + +ENTRY(xen_read_cr2) + FRAME_BEGIN + _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX + _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX + FRAME_END + ret + ENDPROC(xen_read_cr2); + +ENTRY(xen_read_cr2_direct) + FRAME_BEGIN + _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX + FRAME_END + ret + ENDPROC(xen_read_cr2_direct); diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index c15db060a242..cd177772fe4d 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -126,10 +126,9 @@ hyper_iret: .globl xen_iret_start_crit, xen_iret_end_crit /* - * This is called by xen_hypervisor_callback in entry.S when it sees + * This is called by xen_hypervisor_callback in entry_32.S when it sees * that the EIP at the time of interrupt was between - * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in - * %eax so we can do a more refined determination of what to do. + * xen_iret_start_crit and xen_iret_end_crit. * * The stack format at this point is: * ---------------- @@ -138,70 +137,46 @@ hyper_iret: * eflags } outer exception info * cs } * eip } - * ---------------- <- edi (copy dest) - * eax : outer eax if it hasn't been restored * ---------------- - * eflags } nested exception info - * cs } (no ss/esp because we're nested - * eip } from the same ring) - * orig_eax }<- esi (copy src) - * - - - - - - - - - * fs } - * es } - * ds } SAVE_ALL state - * eax } - * : : - * ebx }<- esp + * eax : outer eax if it hasn't been restored * ---------------- + * eflags } + * cs } nested exception info + * eip } + * return address : (into xen_hypervisor_callback) * - * In order to deliver the nested exception properly, we need to shift - * everything from the return addr up to the error code so it sits - * just under the outer exception info. This means that when we - * handle the exception, we do it in the context of the outer - * exception rather than starting a new one. + * In order to deliver the nested exception properly, we need to discard the + * nested exception frame such that when we handle the exception, we do it + * in the context of the outer exception rather than starting a new one. * - * The only caveat is that if the outer eax hasn't been restored yet - * (ie, it's still on stack), we need to insert its value into the - * SAVE_ALL state before going on, since it's usermode state which we - * eventually need to restore. + * The only caveat is that if the outer eax hasn't been restored yet (i.e. + * it's still on stack), we need to restore its value here. */ ENTRY(xen_iret_crit_fixup) /* * Paranoia: Make sure we're really coming from kernel space. * One could imagine a case where userspace jumps into the * critical range address, but just before the CPU delivers a - * GP, it decides to deliver an interrupt instead. Unlikely? - * Definitely. Easy to avoid? Yes. The Intel documents - * explicitly say that the reported EIP for a bad jump is the - * jump instruction itself, not the destination, but some - * virtual environments get this wrong. + * PF, it decides to deliver an interrupt instead. Unlikely? + * Definitely. Easy to avoid? Yes. */ - movl PT_CS(%esp), %ecx - andl $SEGMENT_RPL_MASK, %ecx - cmpl $USER_RPL, %ecx - je 2f - - lea PT_ORIG_EAX(%esp), %esi - lea PT_EFLAGS(%esp), %edi + testb $2, 2*4(%esp) /* nested CS */ + jnz 2f /* * If eip is before iret_restore_end then stack * hasn't been restored yet. */ - cmp $iret_restore_end, %eax + cmpl $iret_restore_end, 1*4(%esp) jae 1f - movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ - movl %eax, PT_EAX(%esp) + movl 4*4(%esp), %eax /* load outer EAX */ + ret $4*4 /* discard nested EIP, CS, and EFLAGS as + * well as the just restored EAX */ - lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ - - /* set up the copy */ -1: std - mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */ - rep movsl - cld - - lea 4(%edi), %esp /* point esp to new frame */ -2: jmp xen_do_upcall +1: + ret $3*4 /* discard nested EIP, CS, and EFLAGS */ +2: + ret +END(xen_iret_crit_fixup) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 2f111f47ba98..45a441c33d6d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(void); __visible unsigned long xen_save_fl_direct(void); __visible void xen_restore_fl_direct(unsigned long); +__visible unsigned long xen_read_cr2(void); +__visible unsigned long xen_read_cr2_direct(void); + /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); __visible void xen_sysret32(void); diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 359ab40e935a..c90fb944f9d8 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -51,7 +51,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->areg[0] = (long) error ? error : val; + regs->areg[2] = (long) error ? error : val; } #define SYSCALL_MAX_ARGS 6 diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index f092cc3f4e66..956d4d47c6cd 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -55,6 +55,10 @@ struct thread_info { mm_segment_t addr_limit; /* thread address space */ unsigned long cpenable; +#if XCHAL_HAVE_EXCLUSIVE + /* result of the most recent exclusive store */ + unsigned long atomctl8; +#endif /* Allocate storage for extra user states and coprocessor states. */ #if XTENSA_HAVE_COPROCESSORS diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index 33a257b33723..dc5c83cad9be 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -93,6 +93,9 @@ int main(void) DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra)); DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp)); DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable)); +#if XCHAL_HAVE_EXCLUSIVE + DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8)); +#endif #if XTENSA_HAVE_COPROCESSORS DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0)); DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1)); diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index e54af8b7e0f8..f9c7a02acc29 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -374,6 +374,11 @@ common_exception: s32i a2, a1, PT_LCOUNT #endif +#if XCHAL_HAVE_EXCLUSIVE + /* Clear exclusive access monitor set by interrupted code */ + clrex +#endif + /* It is now save to restore the EXC_TABLE_FIXUP variable. */ rsr a2, exccause @@ -1989,6 +1994,12 @@ ENTRY(_switch_to) s32i a3, a4, THREAD_CPENABLE #endif +#if XCHAL_HAVE_EXCLUSIVE + l32i a3, a5, THREAD_ATOMCTL8 + getex a3 + s32i a3, a4, THREAD_ATOMCTL8 +#endif + /* Flush register file. */ spill_registers_kernel diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index 9bae79f70301..86c9ba963155 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -401,7 +401,7 @@ static struct pmu xtensa_pmu = { .read = xtensa_pmu_read, }; -static int xtensa_pmu_setup(int cpu) +static int xtensa_pmu_setup(unsigned int cpu) { unsigned i; diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 0634bfb82a0b..5ea7f39e5d62 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -723,7 +723,8 @@ c_start(struct seq_file *f, loff_t *pos) static void * c_next(struct seq_file *f, void *v, loff_t *pos) { - return NULL; + ++*pos; + return c_start(f, pos); } static void diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 4092555828b1..24cf6972eace 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -87,13 +87,13 @@ void __xtensa_libgcc_window_spill(void) } EXPORT_SYMBOL(__xtensa_libgcc_window_spill); -unsigned long __sync_fetch_and_and_4(unsigned long *p, unsigned long v) +unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v) { BUG(); } EXPORT_SYMBOL(__sync_fetch_and_and_4); -unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v) +unsigned int __sync_fetch_and_or_4(volatile void *p, unsigned int v) { BUG(); } diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c index af7152560bc3..b771459778fe 100644 --- a/arch/xtensa/mm/kasan_init.c +++ b/arch/xtensa/mm/kasan_init.c @@ -56,7 +56,9 @@ static void __init populate(void *start, void *end) for (k = 0; k < PTRS_PER_PTE; ++k, ++j) { phys_addr_t phys = - memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, + 0, + MEMBLOCK_ALLOC_ANYWHERE); if (!phys) panic("Failed to allocate page table page\n"); diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 59153d0aa890..b43f03620843 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -216,6 +216,8 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb) unsigned tlbidx = w | (e << PAGE_SHIFT); unsigned r0 = dtlb ? read_dtlb_virtual(tlbidx) : read_itlb_virtual(tlbidx); + unsigned r1 = dtlb ? + read_dtlb_translation(tlbidx) : read_itlb_translation(tlbidx); unsigned vpn = (r0 & PAGE_MASK) | (e << PAGE_SHIFT); unsigned pte = get_pte_for_vaddr(vpn); unsigned mm_asid = (get_rasid_register() >> 8) & ASID_MASK; @@ -231,8 +233,6 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb) } if (tlb_asid == mm_asid) { - unsigned r1 = dtlb ? read_dtlb_translation(tlbidx) : - read_itlb_translation(tlbidx); if ((pte ^ r1) & PAGE_MASK) { pr_err("%cTLB: way: %u, entry: %u, mapping: %08x->%08x, PTE: %08x\n", dtlb ? 'D' : 'I', w, e, r0, r1, pte); |