diff options
Diffstat (limited to 'arch')
148 files changed, 1426 insertions, 652 deletions
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 0974226fab55..aa000075a575 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -65,6 +65,7 @@ CONFIG_DRM_UDL=y CONFIG_DRM_ETNAVIV=y CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index aa41af6ef4ac..efdedf83b954 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -11,6 +11,7 @@ #include <linux/clocksource.h> #include <linux/console.h> #include <linux/module.h> +#include <linux/sizes.h> #include <linux/cpu.h> #include <linux/of_clk.h> #include <linux/of_fdt.h> @@ -409,12 +410,12 @@ static void arc_chk_core_config(void) if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr) panic("Linux built with incorrect DCCM Base address\n"); - if (CONFIG_ARC_DCCM_SZ != cpu->dccm.sz) + if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz) panic("Linux built with incorrect DCCM Size\n"); #endif #ifdef CONFIG_ARC_HAS_ICCM - if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz) + if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz) panic("Linux built with incorrect ICCM Size\n"); #endif diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index a931d0a256d0..a645bca5899a 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -6,6 +6,7 @@ menuconfig ARC_PLAT_EZNPS bool "\"EZchip\" ARC dev platform" + depends on ISA_ARCOMPACT select CPU_BIG_ENDIAN select CLKSRC_NPS if !PHYS_ADDR_T_64BIT select EZNPS_GIC diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 97864aabc2a6..579f7eb6968a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -13,6 +13,7 @@ config ARM select ARCH_HAS_KEEPINITRD select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL if ARM_LPAE select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_SETUP_DMA_OPS diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index fc7ed03d8b93..51b078604978 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -43,7 +43,7 @@ SECTIONS } .table : ALIGN(4) { _table_start = .; - LONG(ZIMAGE_MAGIC(2)) + LONG(ZIMAGE_MAGIC(4)) LONG(ZIMAGE_MAGIC(0x5a534c4b)) LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start)) LONG(ZIMAGE_MAGIC(_kernel_bss_size)) diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index ba7f3e646c26..1333a68b9373 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -125,8 +125,6 @@ bus-width = <8>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sdmmc0_default>; - non-removable; - mmc-ddr-1_8v; status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm-hr2.dtsi b/arch/arm/boot/dts/bcm-hr2.dtsi index 6142c672811e..5e5f5ca3c86f 100644 --- a/arch/arm/boot/dts/bcm-hr2.dtsi +++ b/arch/arm/boot/dts/bcm-hr2.dtsi @@ -75,7 +75,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; clocks = <&periph_clk>; }; @@ -83,7 +83,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x20>; interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(1) | - IRQ_TYPE_LEVEL_HIGH)>; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; @@ -91,7 +91,7 @@ compatible = "arm,cortex-a9-twd-wdt"; reg = <0x20620 0x20>; interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(1) | - IRQ_TYPE_LEVEL_HIGH)>; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index 4c3f606e5b8d..f65448c01e31 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -24,7 +24,7 @@ leds { act { - gpios = <&gpio 47 GPIO_ACTIVE_HIGH>; + gpios = <&gpio 47 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 5f5ee16f07a3..a341511f014c 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -172,6 +172,7 @@ #address-cells = <1>; ranges = <0x51000000 0x51000000 0x3000 0x0 0x20000000 0x10000000>; + dma-ranges; /** * To enable PCI endpoint mode, disable the pcie1_rc * node and enable pcie1_ep mode. @@ -185,7 +186,6 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x20013000 0x13000 0 0xffed000>; - dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; @@ -230,6 +230,7 @@ #address-cells = <1>; ranges = <0x51800000 0x51800000 0x3000 0x0 0x30000000 0x10000000>; + dma-ranges; status = "disabled"; pcie2_rc: pcie@51800000 { reg = <0x51800000 0x2000>, <0x51802000 0x14c>, <0x1000 0x2000>; @@ -240,7 +241,6 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x30013000 0x13000 0 0xffed000>; - dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; diff --git a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts index 0cd75dadf292..188639738dc3 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts +++ b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts @@ -75,8 +75,8 @@ imx27-phycard-s-rdk { pinctrl_i2c1: i2c1grp { fsl,pins = < - MX27_PAD_I2C2_SDA__I2C2_SDA 0x0 - MX27_PAD_I2C2_SCL__I2C2_SCL 0x0 + MX27_PAD_I2C_DATA__I2C_DATA 0x0 + MX27_PAD_I2C_CLK__I2C_CLK 0x0 >; }; diff --git a/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts b/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts index 0d594e4bd559..a1173bf5bff5 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts +++ b/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts @@ -38,7 +38,7 @@ }; &switch_ports { - /delete-node/ port@2; + /delete-node/ port@3; }; &touchscreen { diff --git a/arch/arm/boot/dts/imx6q-b450v3.dts b/arch/arm/boot/dts/imx6q-b450v3.dts index 95b8f2d71821..fb0980190aa0 100644 --- a/arch/arm/boot/dts/imx6q-b450v3.dts +++ b/arch/arm/boot/dts/imx6q-b450v3.dts @@ -65,13 +65,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>, - <&clks IMX6QDL_CLK_PLL3_USB_OTG>; -}; - &ldb { status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-b650v3.dts b/arch/arm/boot/dts/imx6q-b650v3.dts index 611cb7ae7e55..8f762d9c5ae9 100644 --- a/arch/arm/boot/dts/imx6q-b650v3.dts +++ b/arch/arm/boot/dts/imx6q-b650v3.dts @@ -65,13 +65,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>, - <&clks IMX6QDL_CLK_PLL3_USB_OTG>; -}; - &ldb { status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-b850v3.dts b/arch/arm/boot/dts/imx6q-b850v3.dts index e4cb118f88c6..1ea64ecf4291 100644 --- a/arch/arm/boot/dts/imx6q-b850v3.dts +++ b/arch/arm/boot/dts/imx6q-b850v3.dts @@ -53,17 +53,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>, - <&clks IMX6QDL_CLK_IPU1_DI0_PRE_SEL>, - <&clks IMX6QDL_CLK_IPU2_DI0_PRE_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL2_PFD2_396M>, - <&clks IMX6QDL_CLK_PLL2_PFD2_396M>; -}; - &ldb { fsl,dual-channel; status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/imx6q-bx50v3.dtsi index fa27dcdf06f1..1938b04199c4 100644 --- a/arch/arm/boot/dts/imx6q-bx50v3.dtsi +++ b/arch/arm/boot/dts/imx6q-bx50v3.dtsi @@ -377,3 +377,18 @@ #interrupt-cells = <1>; }; }; + +&clks { + assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, + <&clks IMX6QDL_CLK_IPU1_DI0_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU1_DI1_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU2_DI0_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU2_DI1_PRE_SEL>; + assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, + <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>; +}; diff --git a/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi index 44a97ba93a95..352ac585ca6b 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi @@ -153,6 +153,7 @@ bus-width = <4>; keep-power-in-suspend; mmc-pwrseq = <&pwrseq_ti_wifi>; + cap-power-off-card; non-removable; vmmc-supply = <&vcc_3v3>; /* vqmmc-supply = <&nvcc_sd1>; - MMC layer doesn't like it! */ diff --git a/arch/arm/boot/dts/mmp3-dell-ariel.dts b/arch/arm/boot/dts/mmp3-dell-ariel.dts index 15449c72c042..b0ec14c42164 100644 --- a/arch/arm/boot/dts/mmp3-dell-ariel.dts +++ b/arch/arm/boot/dts/mmp3-dell-ariel.dts @@ -98,19 +98,19 @@ status = "okay"; }; -&ssp3 { +&ssp1 { status = "okay"; - cs-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpio 46 GPIO_ACTIVE_LOW>; firmware-flash@0 { - compatible = "st,m25p80", "jedec,spi-nor"; + compatible = "winbond,w25q32", "jedec,spi-nor"; reg = <0>; - spi-max-frequency = <40000000>; + spi-max-frequency = <104000000>; m25p,fast-read; }; }; -&ssp4 { - cs-gpios = <&gpio 56 GPIO_ACTIVE_HIGH>; +&ssp2 { + cs-gpios = <&gpio 56 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/mmp3.dtsi b/arch/arm/boot/dts/mmp3.dtsi index 59a108e49b41..1e25bf998ab5 100644 --- a/arch/arm/boot/dts/mmp3.dtsi +++ b/arch/arm/boot/dts/mmp3.dtsi @@ -202,8 +202,7 @@ }; hsic_phy0: hsic-phy@f0001800 { - compatible = "marvell,mmp3-hsic-phy", - "usb-nop-xceiv"; + compatible = "marvell,mmp3-hsic-phy"; reg = <0xf0001800 0x40>; #phy-cells = <0>; status = "disabled"; @@ -224,8 +223,7 @@ }; hsic_phy1: hsic-phy@f0002800 { - compatible = "marvell,mmp3-hsic-phy", - "usb-nop-xceiv"; + compatible = "marvell,mmp3-hsic-phy"; reg = <0xf0002800 0x40>; #phy-cells = <0>; status = "disabled"; @@ -531,7 +529,7 @@ }; soc_clocks: clocks@d4050000 { - compatible = "marvell,mmp2-clock"; + compatible = "marvell,mmp3-clock"; reg = <0xd4050000 0x1000>, <0xd4282800 0x400>, <0xd4015000 0x1000>; diff --git a/arch/arm/boot/dts/motorola-mapphone-common.dtsi b/arch/arm/boot/dts/motorola-mapphone-common.dtsi index 9067e0ef4240..06fbffa81636 100644 --- a/arch/arm/boot/dts/motorola-mapphone-common.dtsi +++ b/arch/arm/boot/dts/motorola-mapphone-common.dtsi @@ -367,6 +367,8 @@ }; &mmc3 { + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins>; vmmc-supply = <&wl12xx_vmmc>; /* uart2_tx.sdmmc3_dat1 pad as wakeirq */ interrupts-extended = <&wakeupgen GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH @@ -472,6 +474,37 @@ >; }; + /* + * Android uses PIN_OFF_INPUT_PULLDOWN | PIN_INPUT_PULLUP | MUX_MODE3 + * for gpio_100, but the internal pull makes wlan flakey on some + * devices. Off mode value should be tested if we have off mode working + * later on. + */ + mmc3_pins: pinmux_mmc3_pins { + pinctrl-single,pins = < + /* 0x4a10008e gpmc_wait2.gpio_100 d23 */ + OMAP4_IOPAD(0x08e, PIN_INPUT | MUX_MODE3) + + /* 0x4a100102 abe_mcbsp1_dx.sdmmc3_dat2 ab25 */ + OMAP4_IOPAD(0x102, PIN_INPUT_PULLUP | MUX_MODE1) + + /* 0x4a100104 abe_mcbsp1_fsx.sdmmc3_dat3 ac27 */ + OMAP4_IOPAD(0x104, PIN_INPUT_PULLUP | MUX_MODE1) + + /* 0x4a100118 uart2_cts.sdmmc3_clk ab26 */ + OMAP4_IOPAD(0x118, PIN_INPUT | MUX_MODE1) + + /* 0x4a10011a uart2_rts.sdmmc3_cmd ab27 */ + OMAP4_IOPAD(0x11a, PIN_INPUT_PULLUP | MUX_MODE1) + + /* 0x4a10011c uart2_rx.sdmmc3_dat0 aa25 */ + OMAP4_IOPAD(0x11c, PIN_INPUT_PULLUP | MUX_MODE1) + + /* 0x4a10011e uart2_tx.sdmmc3_dat1 aa26 */ + OMAP4_IOPAD(0x11e, PIN_INPUT_PULLUP | MUX_MODE1) + >; + }; + /* gpmc_ncs0.gpio_50 */ poweroff_gpio: pinmux_poweroff_pins { pinctrl-single,pins = < @@ -690,14 +723,18 @@ }; /* - * As uart1 is wired to mdm6600 with rts and cts, we can use the cts pin for - * uart1 wakeirq. + * The uart1 port is wired to mdm6600 with rts and cts. The modem uses gpio_149 + * for wake-up events for both the USB PHY and the UART. We can use gpio_149 + * pad as the shared wakeirq for the UART rather than the RX or CTS pad as we + * have gpio_149 trigger before the UART transfer starts. */ &uart1 { pinctrl-names = "default"; pinctrl-0 = <&uart1_pins>; interrupts-extended = <&wakeupgen GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH - &omap4_pmx_core 0xfc>; + &omap4_pmx_core 0x110>; + uart-has-rtscts; + current-speed = <115200>; }; &uart3 { diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index a5cd31229fbd..a3ba722a9d7f 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -131,7 +131,14 @@ cmt1: timer@e6130000 { compatible = "renesas,r8a73a4-cmt1", "renesas,rcar-gen2-cmt1"; reg = <0 0xe6130000 0 0x1004>; - interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>; clocks = <&mstp3_clks R8A73A4_CLK_CMT1>; clock-names = "fck"; power-domains = <&pd_c5>; diff --git a/arch/arm/boot/dts/r8a7740.dtsi b/arch/arm/boot/dts/r8a7740.dtsi index ebc1ff64f530..90feb2cf9960 100644 --- a/arch/arm/boot/dts/r8a7740.dtsi +++ b/arch/arm/boot/dts/r8a7740.dtsi @@ -479,7 +479,7 @@ cpg_clocks: cpg_clocks@e6150000 { compatible = "renesas,r8a7740-cpg-clocks"; reg = <0xe6150000 0x10000>; - clocks = <&extal1_clk>, <&extalr_clk>; + clocks = <&extal1_clk>, <&extal2_clk>, <&extalr_clk>; #clock-cells = <1>; clock-output-names = "system", "pllc0", "pllc1", "pllc2", "r", diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index cf36e25195b4..8c4b8f56c9e0 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -128,7 +128,7 @@ assigned-clocks = <&cru SCLK_GPU>; assigned-clock-rates = <100000000>; clocks = <&cru SCLK_GPU>, <&cru SCLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; resets = <&cru SRST_GPU>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/rk3228-evb.dts b/arch/arm/boot/dts/rk3228-evb.dts index 5670b33fd1bd..aed879db6c15 100644 --- a/arch/arm/boot/dts/rk3228-evb.dts +++ b/arch/arm/boot/dts/rk3228-evb.dts @@ -46,7 +46,7 @@ #address-cells = <1>; #size-cells = <0>; - phy: phy@0 { + phy: ethernet-phy@0 { compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22"; reg = <0>; clocks = <&cru SCLK_MAC_PHY>; diff --git a/arch/arm/boot/dts/rk3229-xms6.dts b/arch/arm/boot/dts/rk3229-xms6.dts index 679fc2b00e5a..933ef69da32a 100644 --- a/arch/arm/boot/dts/rk3229-xms6.dts +++ b/arch/arm/boot/dts/rk3229-xms6.dts @@ -150,7 +150,7 @@ #address-cells = <1>; #size-cells = <0>; - phy: phy@0 { + phy: ethernet-phy@0 { compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22"; reg = <0>; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 4e90efdc9630..a83f65486ad4 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -561,7 +561,7 @@ "pp1", "ppmmu1"; clocks = <&cru ACLK_GPU>, <&cru ACLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; resets = <&cru SRST_GPU_A>; status = "disabled"; }; @@ -1033,7 +1033,7 @@ }; }; - spi-0 { + spi0 { spi0_clk: spi0-clk { rockchip,pins = <0 RK_PB1 2 &pcfg_pull_up>; }; @@ -1051,7 +1051,7 @@ }; }; - spi-1 { + spi1 { spi1_clk: spi1-clk { rockchip,pins = <0 RK_PC7 2 &pcfg_pull_up>; }; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index 241f43e29c77..bb5ff10b9110 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -84,7 +84,7 @@ compatible = "arm,mali-400"; reg = <0x10090000 0x10000>; clocks = <&cru ACLK_GPU>, <&cru ACLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; assigned-clocks = <&cru ACLK_GPU>; assigned-clock-rates = <100000000>; resets = <&cru SRST_GPU>; diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 6fdb0ac62b3d..59da6c0b63b6 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, return; } - kernel_neon_begin(); - chacha_doneon(state, dst, src, bytes, nrounds); - kernel_neon_end(); + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); } EXPORT_SYMBOL(chacha_crypt_arch); diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c index ae5aefc44a4d..ffa8d73fe722 100644 --- a/arch/arm/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c @@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c index ceec04ec2f40..13cfef4ae22e 100644 --- a/arch/arm/crypto/poly1305-glue.c +++ b/arch/arm/crypto/poly1305-glue.c @@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); if (static_branch_likely(&have_neon) && do_neon) { - kernel_neon_begin(); - poly1305_blocks_neon(&dctx->h, src, len, 1); - kernel_neon_end(); + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(&dctx->h, src, todo, 1); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); } else { poly1305_blocks_arm(&dctx->h, src, len, 1); + src += len; } - src += len; nbytes %= POLY1305_BLOCK_SIZE; } diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 99929122dad7..3546d294d55f 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -18,11 +18,11 @@ #endif #include <asm/ptrace.h> -#include <asm/domain.h> #include <asm/opcodes-virt.h> #include <asm/asm-offsets.h> #include <asm/page.h> #include <asm/thread_info.h> +#include <asm/uaccess-asm.h> #define IOMEM(x) (x) @@ -446,79 +446,6 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .size \name , . - \name .endm - .macro csdb -#ifdef CONFIG_THUMB2_KERNEL - .inst.w 0xf3af8014 -#else - .inst 0xe320f014 -#endif - .endm - - .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req -#ifndef CONFIG_CPU_USE_DOMAINS - adds \tmp, \addr, #\size - 1 - sbcscc \tmp, \tmp, \limit - bcs \bad -#ifdef CONFIG_CPU_SPECTRE - movcs \addr, #0 - csdb -#endif -#endif - .endm - - .macro uaccess_mask_range_ptr, addr:req, size:req, limit:req, tmp:req -#ifdef CONFIG_CPU_SPECTRE - sub \tmp, \limit, #1 - subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr - addhs \tmp, \tmp, #1 @ if (tmp >= 0) { - subshs \tmp, \tmp, \size @ tmp = limit - (addr + size) } - movlo \addr, #0 @ if (tmp < 0) addr = NULL - csdb -#endif - .endm - - .macro uaccess_disable, tmp, isb=1 -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - /* - * Whenever we re-enter userspace, the domains should always be - * set appropriately. - */ - mov \tmp, #DACR_UACCESS_DISABLE - mcr p15, 0, \tmp, c3, c0, 0 @ Set domain register - .if \isb - instr_sync - .endif -#endif - .endm - - .macro uaccess_enable, tmp, isb=1 -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - /* - * Whenever we re-enter userspace, the domains should always be - * set appropriately. - */ - mov \tmp, #DACR_UACCESS_ENABLE - mcr p15, 0, \tmp, c3, c0, 0 - .if \isb - instr_sync - .endif -#endif - .endm - - .macro uaccess_save, tmp -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - mrc p15, 0, \tmp, c3, c0, 0 - str \tmp, [sp, #SVC_DACR] -#endif - .endm - - .macro uaccess_restore -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - ldr r0, [sp, #SVC_DACR] - mcr p15, 0, r0, c3, c0, 0 -#endif - .endm - .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 83c391b597d4..fdc4ae3e7378 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -164,8 +164,13 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) preempt_enable(); #endif - if (!ret) - *oval = oldval; + /* + * Store unconditionally. If ret != 0 the extra store is the least + * of the worries but GCC cannot figure out that __futex_atomic_op() + * is either setting ret to -EFAULT or storing the old value in + * oldval which results in a uninitialized warning at the call site. + */ + *oval = oldval; return ret; } diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 3944305e81df..b26c1aaf1e3c 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -367,6 +367,7 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, } } -static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {} +static inline bool vcpu_has_ptrauth(struct kvm_vcpu *vcpu) { return false; } +static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) { } #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index a827b4d60d38..03932e172730 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -453,4 +453,6 @@ static inline bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) return true; } +#define kvm_arm_vcpu_loaded(vcpu) (false) + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h new file mode 100644 index 000000000000..907571fd05c6 --- /dev/null +++ b/arch/arm/include/asm/uaccess-asm.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_UACCESS_ASM_H__ +#define __ASM_UACCESS_ASM_H__ + +#include <asm/asm-offsets.h> +#include <asm/domain.h> +#include <asm/memory.h> +#include <asm/thread_info.h> + + .macro csdb +#ifdef CONFIG_THUMB2_KERNEL + .inst.w 0xf3af8014 +#else + .inst 0xe320f014 +#endif + .endm + + .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req +#ifndef CONFIG_CPU_USE_DOMAINS + adds \tmp, \addr, #\size - 1 + sbcscc \tmp, \tmp, \limit + bcs \bad +#ifdef CONFIG_CPU_SPECTRE + movcs \addr, #0 + csdb +#endif +#endif + .endm + + .macro uaccess_mask_range_ptr, addr:req, size:req, limit:req, tmp:req +#ifdef CONFIG_CPU_SPECTRE + sub \tmp, \limit, #1 + subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr + addhs \tmp, \tmp, #1 @ if (tmp >= 0) { + subshs \tmp, \tmp, \size @ tmp = limit - (addr + size) } + movlo \addr, #0 @ if (tmp < 0) addr = NULL + csdb +#endif + .endm + + .macro uaccess_disable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_DISABLE + mcr p15, 0, \tmp, c3, c0, 0 @ Set domain register + .if \isb + instr_sync + .endif +#endif + .endm + + .macro uaccess_enable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_ENABLE + mcr p15, 0, \tmp, c3, c0, 0 + .if \isb + instr_sync + .endif +#endif + .endm + +#if defined(CONFIG_CPU_SW_DOMAIN_PAN) || defined(CONFIG_CPU_USE_DOMAINS) +#define DACR(x...) x +#else +#define DACR(x...) +#endif + + /* + * Save the address limit on entry to a privileged exception. + * + * If we are using the DACR for kernel access by the user accessors + * (CONFIG_CPU_USE_DOMAINS=y), always reset the DACR kernel domain + * back to client mode, whether or not \disable is set. + * + * If we are using SW PAN, set the DACR user domain to no access + * if \disable is set. + */ + .macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable + ldr \tmp1, [\tsk, #TI_ADDR_LIMIT] + mov \tmp2, #TASK_SIZE + str \tmp2, [\tsk, #TI_ADDR_LIMIT] + DACR( mrc p15, 0, \tmp0, c3, c0, 0) + DACR( str \tmp0, [sp, #SVC_DACR]) + str \tmp1, [sp, #SVC_ADDR_LIMIT] + .if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN) + /* kernel=client, user=no access */ + mov \tmp2, #DACR_UACCESS_DISABLE + mcr p15, 0, \tmp2, c3, c0, 0 + instr_sync + .elseif IS_ENABLED(CONFIG_CPU_USE_DOMAINS) + /* kernel=client */ + bic \tmp2, \tmp0, #domain_mask(DOMAIN_KERNEL) + orr \tmp2, \tmp2, #domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT) + mcr p15, 0, \tmp2, c3, c0, 0 + instr_sync + .endif + .endm + + /* Restore the user access state previously saved by uaccess_entry */ + .macro uaccess_exit, tsk, tmp0, tmp1 + ldr \tmp1, [sp, #SVC_ADDR_LIMIT] + DACR( ldr \tmp0, [sp, #SVC_DACR]) + str \tmp1, [\tsk, #TI_ADDR_LIMIT] + DACR( mcr p15, 0, \tmp0, c3, c0, 0) + .endm + +#undef DACR + +#endif /* __ASM_UACCESS_ASM_H__ */ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 77f54830554c..55a47df04773 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -27,6 +27,7 @@ #include <asm/unistd.h> #include <asm/tls.h> #include <asm/system_info.h> +#include <asm/uaccess-asm.h> #include "entry-header.S" #include <asm/entry-macro-multi.S> @@ -179,15 +180,7 @@ ENDPROC(__und_invalid) stmia r7, {r2 - r6} get_thread_info tsk - ldr r0, [tsk, #TI_ADDR_LIMIT] - mov r1, #TASK_SIZE - str r1, [tsk, #TI_ADDR_LIMIT] - str r0, [sp, #SVC_ADDR_LIMIT] - - uaccess_save r0 - .if \uaccess - uaccess_disable r0 - .endif + uaccess_entry tsk, r0, r1, r2, \uaccess .if \trace #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 32051ec5b33f..40db0f9188b6 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -6,6 +6,7 @@ #include <asm/asm-offsets.h> #include <asm/errno.h> #include <asm/thread_info.h> +#include <asm/uaccess-asm.h> #include <asm/v7m.h> @ Bad Abort numbers @@ -217,9 +218,7 @@ blne trace_hardirqs_off #endif .endif - ldr r1, [sp, #SVC_ADDR_LIMIT] - uaccess_restore - str r1, [tsk, #TI_ADDR_LIMIT] + uaccess_exit tsk, r0, r1 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode SVC restore @@ -263,9 +262,7 @@ @ on the stack remains correct). @ .macro svc_exit_via_fiq - ldr r1, [sp, #SVC_ADDR_LIMIT] - uaccess_restore - str r1, [tsk, #TI_ADDR_LIMIT] + uaccess_exit tsk, r0, r1 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore mov r0, sp diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index b606cded90cd..4cc6a7eff635 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -219,8 +219,8 @@ static struct undef_hook arm_break_hook = { }; static struct undef_hook thumb_break_hook = { - .instr_mask = 0xffff, - .instr_val = 0xde01, + .instr_mask = 0xffffffff, + .instr_val = 0x0000de01, .cpsr_mask = PSR_T_BIT, .cpsr_val = PSR_T_BIT, .fn = break_trap, diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0b30e884e088..84e1f0a43cdb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -21,6 +21,7 @@ config ARM64 select ARCH_HAS_KCOV select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index abe04f4ad7d8..eeaa95baaa10 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -2204,7 +2204,7 @@ reg = <0x0 0xff400000 0x0 0x40000>; interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clkc CLKID_USB1_DDR_BRIDGE>; - clock-names = "ddr"; + clock-names = "otg"; phys = <&usb2_phy1>; phy-names = "usb2-phy"; dr_mode = "peripheral"; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi index 554863429aa6..e2094575f528 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi @@ -152,6 +152,10 @@ clock-latency = <50000>; }; +&frddr_a { + status = "okay"; +}; + &frddr_b { status = "okay"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-ugoos-am6.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-ugoos-am6.dts index ccd0bced01e8..2e66d6418a59 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-ugoos-am6.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-ugoos-am6.dts @@ -545,7 +545,7 @@ &usb { status = "okay"; dr_mode = "host"; - vbus-regulator = <&usb_pwr_en>; + vbus-supply = <&usb_pwr_en>; }; &usb2_phy0 { diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index a44b5438e842..882e913436ca 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -661,7 +661,7 @@ reg = <0x30bd0000 0x10000>; interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SDMA1_ROOT>, - <&clk IMX8MN_CLK_SDMA1_ROOT>; + <&clk IMX8MN_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin"; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 8b4e806d5119..125c78321ab4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -1401,8 +1401,8 @@ "venc_lt_sel"; assigned-clocks = <&topckgen CLK_TOP_VENC_SEL>, <&topckgen CLK_TOP_VENC_LT_SEL>; - assigned-clock-parents = <&topckgen CLK_TOP_VENCPLL_D2>, - <&topckgen CLK_TOP_UNIVPLL1_D2>; + assigned-clock-parents = <&topckgen CLK_TOP_VCODECPLL>, + <&topckgen CLK_TOP_VCODECPLL_370P5>; }; jpegdec: jpegdec@18004000 { diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index fff6115f2670..3c7c9b52623c 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -658,8 +658,8 @@ s11 { qcom,saw-leader; regulator-always-on; - regulator-min-microvolt = <1230000>; - regulator-max-microvolt = <1230000>; + regulator-min-microvolt = <980000>; + regulator-max-microvolt = <980000>; }; }; @@ -908,10 +908,27 @@ status = "okay"; }; +&q6asmdai { + dai@0 { + reg = <0>; + }; + + dai@1 { + reg = <1>; + }; + + dai@2 { + reg = <2>; + }; +}; + &sound { compatible = "qcom,apq8096-sndcard"; model = "DB820c"; - audio-routing = "RX_BIAS", "MCLK"; + audio-routing = "RX_BIAS", "MCLK", + "MM_DL1", "MultiMedia1 Playback", + "MM_DL2", "MultiMedia2 Playback", + "MultiMedia3 Capture", "MM_UL3"; mm1-dai-link { link-name = "MultiMedia1"; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 7ae082ea14ea..f925a6c7d293 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -2053,6 +2053,8 @@ reg = <APR_SVC_ASM>; q6asmdai: dais { compatible = "qcom,q6asm-dais"; + #address-cells = <1>; + #size-cells = <0>; #sound-dai-cells = <1>; iommus = <&lpass_q6_smmu 1>; }; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index b340fb469999..1692bc95129e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -1318,6 +1318,7 @@ ipmmu_vip0: mmu@e7b00000 { compatible = "renesas,ipmmu-r8a77980"; reg = <0 0xe7b00000 0 0x1000>; + renesas,ipmmu-main = <&ipmmu_mm 4>; power-domains = <&sysc R8A77980_PD_ALWAYS_ON>; #iommu-cells = <1>; }; @@ -1325,6 +1326,7 @@ ipmmu_vip1: mmu@e7960000 { compatible = "renesas,ipmmu-r8a77980"; reg = <0 0xe7960000 0 0x1000>; + renesas,ipmmu-main = <&ipmmu_mm 11>; power-domains = <&sysc R8A77980_PD_ALWAYS_ON>; #iommu-cells = <1>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts index 49c4b96da3d4..05265b38cc02 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts @@ -86,13 +86,13 @@ assigned-clock-rate = <50000000>; assigned-clocks = <&cru SCLK_MAC2PHY>; assigned-clock-parents = <&cru SCLK_MAC2PHY_SRC>; - + status = "okay"; }; &i2c1 { status = "okay"; - rk805: rk805@18 { + rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; interrupt-parent = <&gpio2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 62936b432f9a..304fad1a0b57 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -169,7 +169,7 @@ &i2c1 { status = "okay"; - rk805: rk805@18 { + rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; interrupt-parent = <&gpio2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 33cc21fcf4c1..c341172ec208 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -410,7 +410,7 @@ reset-names = "usb3-otg"; status = "disabled"; - usbdrd_dwc3_0: dwc3 { + usbdrd_dwc3_0: usb@fe800000 { compatible = "snps,dwc3"; reg = <0x0 0xfe800000 0x0 0x100000>; interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH 0>; @@ -446,7 +446,7 @@ reset-names = "usb3-otg"; status = "disabled"; - usbdrd_dwc3_1: dwc3 { + usbdrd_dwc3_1: usb@fe900000 { compatible = "snps,dwc3"; reg = <0x0 0xfe900000 0x0 0x100000>; interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH 0>; @@ -1890,10 +1890,10 @@ gpu: gpu@ff9a0000 { compatible = "rockchip,rk3399-mali", "arm,mali-t860"; reg = <0x0 0xff9a0000 0x0 0x10000>; - interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH 0>; - interrupt-names = "gpu", "job", "mmu"; + interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH 0>, + <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH 0>, + <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH 0>; + interrupt-names = "job", "mmu", "gpu"; clocks = <&cru ACLK_GPU>; #cooling-cells = <2>; power-domains = <&power RK3399_PD_GPU>; diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c index 37ca3e889848..af2bbca38e70 100644 --- a/arch/arm64/crypto/chacha-neon-glue.c +++ b/arch/arm64/crypto/chacha-neon-glue.c @@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, !crypto_simd_usable()) return chacha_crypt_generic(state, dst, src, bytes, nrounds); - kernel_neon_begin(); - chacha_doneon(state, dst, src, bytes, nrounds); - kernel_neon_end(); + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); } EXPORT_SYMBOL(chacha_crypt_arch); diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index 895d3727c1fb..c5405e6a6db7 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index e97b092f56b8..f33ada70c4ed 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); if (static_branch_likely(&have_neon) && crypto_simd_usable()) { - kernel_neon_begin(); - poly1305_blocks_neon(&dctx->h, src, len, 1); - kernel_neon_end(); + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(&dctx->h, src, todo, 1); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); } else { poly1305_blocks(&dctx->h, src, len, 1); + src += len; } - src += len; nbytes %= POLY1305_BLOCK_SIZE; } diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index b263e239cb59..a45366c3909b 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,6 +12,7 @@ #include <linux/efi.h> #include <linux/memblock.h> #include <linux/psci.h> +#include <linux/stddef.h> #include <asm/cputype.h> #include <asm/io.h> @@ -31,14 +32,14 @@ * is therefore used to delimit the MADT GICC structure minimum length * appropriately. */ -#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \ +#define ACPI_MADT_GICC_MIN_LENGTH offsetof( \ struct acpi_madt_generic_interrupt, efficiency_class) #define BAD_MADT_GICC_ENTRY(entry, end) \ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) -#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ +#define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) /* Basic configuration for ACPI */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index f658dda12364..0ab02e5ff712 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -111,12 +111,6 @@ static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); } -static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) -{ - if (vcpu_has_ptrauth(vcpu)) - vcpu_ptrauth_disable(vcpu); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 57fd46acd058..584d9792cbfe 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -404,8 +404,10 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); * CP14 and CP15 live in the same array, as they are backed by the * same system registers. */ -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) +#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) struct kvm_vm_stat { ulong remote_tlb_flush; @@ -683,4 +685,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_arm_vcpu_loaded(vcpu) ((vcpu)->arch.sysregs_loaded_on_cpu) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 8e9c924423b4..a0b144cfaea7 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -177,6 +177,7 @@ void machine_kexec(struct kimage *kimage) * the offline CPUs. Therefore, we must use the __* variant here. */ __flush_icache_range((uintptr_t)reboot_code_buffer, + (uintptr_t)reboot_code_buffer + arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index cd6e5fa48b9c..c30f77bd875f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1829,10 +1829,11 @@ static void tracehook_report_syscall(struct pt_regs *regs, int syscall_trace_enter(struct pt_regs *regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE) || - test_thread_flag(TIF_SYSCALL_EMU)) { + unsigned long flags = READ_ONCE(current_thread_info()->flags); + + if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); - if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU)) + if (!in_syscall(regs) || (flags & _TIF_SYSCALL_EMU)) return -1; } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index dd2514bb1511..3862cad2410c 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -32,7 +32,7 @@ UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y KCOV_INSTRUMENT := n -CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2bd92301d32f..6194cb3309d0 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -201,6 +201,13 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); + + if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { + int i; + + for (i = 0; i < 16; i++) + *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + } out: return err; } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index aacfc55de44c..e0a4bcdb9451 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -162,31 +162,16 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) - /* * Handle the guest trying to use a ptrauth instruction, or trying to access a * ptrauth register. */ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *ctxt; - - if (vcpu_has_ptrauth(vcpu)) { + if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_enable(vcpu); - ctxt = vcpu->arch.host_cpu_context; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); - } else { + else kvm_inject_undefined(vcpu); - } } /* diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3e909b117f0c..c3d15eaa9ae6 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1280,10 +1280,16 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + int reg = r->reg; + + /* See the 32bit mapping in kvm_host.h */ + if (p->is_aarch32) + reg = r->reg / 2; + if (p->is_write) - vcpu_write_sys_reg(vcpu, p->regval, r->reg); + vcpu_write_sys_reg(vcpu, p->regval, reg); else - p->regval = vcpu_read_sys_reg(vcpu, r->reg); + p->regval = vcpu_read_sys_reg(vcpu, reg); return true; } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index bbeb6a5a6ba6..0be3355e3499 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -230,6 +230,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = (pte_t *)pudp; } else if (sz == (CONT_PTE_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); + if (!pmdp) + return NULL; WARN_ON(addr & (sz - 1)); /* diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h index 5056ebb902d1..61d94ec7dd16 100644 --- a/arch/csky/abiv1/inc/abi/entry.h +++ b/arch/csky/abiv1/inc/abi/entry.h @@ -167,8 +167,8 @@ * BA Reserved C D V */ cprcr r6, cpcr30 - lsri r6, 28 - lsli r6, 28 + lsri r6, 29 + lsli r6, 29 addi r6, 0xe cpwcr r6, cpcr30 diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h index 111973c6c713..ac8f65a3e75a 100644 --- a/arch/csky/abiv2/inc/abi/entry.h +++ b/arch/csky/abiv2/inc/abi/entry.h @@ -13,6 +13,8 @@ #define LSAVE_A1 28 #define LSAVE_A2 32 #define LSAVE_A3 36 +#define LSAVE_A4 40 +#define LSAVE_A5 44 #define KSPTOUSP #define USPTOKSP @@ -225,8 +227,8 @@ */ mfcr r6, cr<30, 15> /* Get MSA0 */ 2: - lsri r6, 28 - lsli r6, 28 + lsri r6, 29 + lsli r6, 29 addi r6, 0x1ce mtcr r6, cr<30, 15> /* Set MSA0 */ diff --git a/arch/csky/include/asm/uaccess.h b/arch/csky/include/asm/uaccess.h index eaa1c3403a42..60f8a4112588 100644 --- a/arch/csky/include/asm/uaccess.h +++ b/arch/csky/include/asm/uaccess.h @@ -254,7 +254,7 @@ do { \ extern int __get_user_bad(void); -#define __copy_user(to, from, n) \ +#define ___copy_to_user(to, from, n) \ do { \ int w0, w1, w2, w3; \ asm volatile( \ @@ -289,31 +289,34 @@ do { \ " subi %0, 4 \n" \ " br 3b \n" \ "5: cmpnei %0, 0 \n" /* 1B */ \ - " bf 8f \n" \ + " bf 13f \n" \ " ldb %3, (%2, 0) \n" \ "6: stb %3, (%1, 0) \n" \ " addi %2, 1 \n" \ " addi %1, 1 \n" \ " subi %0, 1 \n" \ " br 5b \n" \ - "7: br 8f \n" \ + "7: subi %0, 4 \n" \ + "8: subi %0, 4 \n" \ + "12: subi %0, 4 \n" \ + " br 13f \n" \ ".section __ex_table, \"a\" \n" \ ".align 2 \n" \ - ".long 2b, 7b \n" \ - ".long 9b, 7b \n" \ - ".long 10b, 7b \n" \ + ".long 2b, 13f \n" \ + ".long 4b, 13f \n" \ + ".long 6b, 13f \n" \ + ".long 9b, 12b \n" \ + ".long 10b, 8b \n" \ ".long 11b, 7b \n" \ - ".long 4b, 7b \n" \ - ".long 6b, 7b \n" \ ".previous \n" \ - "8: \n" \ + "13: \n" \ : "=r"(n), "=r"(to), "=r"(from), "=r"(w0), \ "=r"(w1), "=r"(w2), "=r"(w3) \ : "0"(n), "1"(to), "2"(from) \ : "memory"); \ } while (0) -#define __copy_user_zeroing(to, from, n) \ +#define ___copy_from_user(to, from, n) \ do { \ int tmp; \ int nsave; \ @@ -356,22 +359,22 @@ do { \ " addi %1, 1 \n" \ " subi %0, 1 \n" \ " br 5b \n" \ - "8: mov %3, %0 \n" \ - " movi %4, 0 \n" \ - "9: stb %4, (%1, 0) \n" \ - " addi %1, 1 \n" \ - " subi %3, 1 \n" \ - " cmpnei %3, 0 \n" \ - " bt 9b \n" \ - " br 7f \n" \ + "8: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ + " bf 7f \n" \ + "9: subi %0, 8 \n" \ + " bf 7f \n" \ + "13: stw %3, (%1, 8) \n" \ + " subi %0, 12 \n" \ + " bf 7f \n" \ ".section __ex_table, \"a\" \n" \ ".align 2 \n" \ - ".long 2b, 8b \n" \ + ".long 2b, 7f \n" \ + ".long 4b, 7f \n" \ + ".long 6b, 7f \n" \ ".long 10b, 8b \n" \ - ".long 11b, 8b \n" \ - ".long 12b, 8b \n" \ - ".long 4b, 8b \n" \ - ".long 6b, 8b \n" \ + ".long 11b, 9b \n" \ + ".long 12b,13b \n" \ ".previous \n" \ "7: \n" \ : "=r"(n), "=r"(to), "=r"(from), "=r"(nsave), \ diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index 007706328000..ff908d28f0a0 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -170,8 +170,10 @@ csky_syscall_trace: ldw a3, (sp, LSAVE_A3) #if defined(__CSKYABIV2__) subi sp, 8 - stw r5, (sp, 0x4) - stw r4, (sp, 0x0) + ldw r9, (sp, LSAVE_A4) + stw r9, (sp, 0x0) + ldw r9, (sp, LSAVE_A5) + stw r9, (sp, 0x4) #else ldw r6, (sp, LSAVE_A4) ldw r7, (sp, LSAVE_A5) @@ -318,8 +320,6 @@ ENTRY(__switch_to) mfcr a2, psr /* Save PSR value */ stw a2, (a3, THREAD_SR) /* Save PSR in task struct */ - bclri a2, 6 /* Disable interrupts */ - mtcr a2, psr SAVE_SWITCH_STACK diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c index e68ff375c8f8..ab55e98ee8f6 100644 --- a/arch/csky/kernel/perf_callchain.c +++ b/arch/csky/kernel/perf_callchain.c @@ -12,12 +12,17 @@ struct stackframe { static int unwind_frame_kernel(struct stackframe *frame) { - if (kstack_end((void *)frame->fp)) + unsigned long low = (unsigned long)task_stack_page(current); + unsigned long high = low + THREAD_SIZE; + + if (unlikely(frame->fp < low || frame->fp > high)) return -EPERM; - if (frame->fp & 0x3 || frame->fp < TASK_SIZE) + + if (kstack_end((void *)frame->fp) || frame->fp & 0x3) return -EPERM; *frame = *(struct stackframe *)frame->fp; + if (__kernel_text_address(frame->lr)) { int graph = 0; diff --git a/arch/csky/lib/usercopy.c b/arch/csky/lib/usercopy.c index 647a23986fb5..3c9bd645e643 100644 --- a/arch/csky/lib/usercopy.c +++ b/arch/csky/lib/usercopy.c @@ -7,10 +7,7 @@ unsigned long raw_copy_from_user(void *to, const void *from, unsigned long n) { - if (access_ok(from, n)) - __copy_user_zeroing(to, from, n); - else - memset(to, 0, n); + ___copy_from_user(to, from, n); return n; } EXPORT_SYMBOL(raw_copy_from_user); @@ -18,8 +15,7 @@ EXPORT_SYMBOL(raw_copy_from_user); unsigned long raw_copy_to_user(void *to, const void *from, unsigned long n) { - if (access_ok(to, n)) - __copy_user(to, from, n); + ___copy_to_user(to, from, n); return n; } EXPORT_SYMBOL(raw_copy_to_user); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 41204a49cf95..7b47a323dc23 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -274,8 +274,12 @@ enum emulation_result { #define MIPS3_PG_SHIFT 6 #define MIPS3_PG_FRAME 0x3fffffc0 +#if defined(CONFIG_64BIT) +#define VPN2_MASK GENMASK(cpu_vmbits - 1, 13) +#else #define VPN2_MASK 0xffffe000 -#define KVM_ENTRYHI_ASID MIPS_ENTRYHI_ASID +#endif +#define KVM_ENTRYHI_ASID cpu_asid_mask(&boot_cpu_data) #define TLB_IS_GLOBAL(x) ((x).tlb_lo[0] & (x).tlb_lo[1] & ENTRYLO_G) #define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) #define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 5224fb38d766..01d7071b23f7 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -562,7 +562,7 @@ void __init mem_init(void) > BITS_PER_LONG); high_memory = __va((max_pfn << PAGE_SHIFT)); - set_max_mapnr(page_to_pfn(virt_to_page(high_memory - 1)) + 1); + set_max_mapnr(max_low_pfn); memblock_free_all(); #ifdef CONFIG_PA11 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 497b7d0b2d7e..35608b9feb14 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -125,11 +125,12 @@ config PPC select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UACCESS_MCSAFE if PPC64 diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 3c0ba22dc360..db0a1c281587 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -75,7 +75,7 @@ .macro kuap_check current, gpr #ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr2, KUAP(thread) + lwz \gpr, KUAP(thread) 999: twnei \gpr, 0 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) #endif diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 2f500debae21..0969285996cb 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -166,13 +166,17 @@ do { \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ might_fault(); \ - __chk_user_ptr(ptr); \ + __chk_user_ptr(__pu_addr); \ if (do_allow) \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ else \ - __put_user_size_allowed((x), __pu_addr, (size), __pu_err); \ + __put_user_size_allowed(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -180,9 +184,13 @@ do { \ ({ \ long __pu_err = -EFAULT; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ might_fault(); \ - if (access_ok(__pu_addr, size)) \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + if (access_ok(__pu_addr, __pu_size)) \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -190,8 +198,12 @@ do { \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __chk_user_ptr(ptr); \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ + __chk_user_ptr(__pu_addr); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -283,15 +295,18 @@ do { \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ + __typeof__(size) __gu_size = (size); \ + \ + __chk_user_ptr(__gu_addr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ barrier_nospec(); \ if (do_allow) \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ else \ - __get_user_size_allowed(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -300,12 +315,15 @@ do { \ long __gu_err = -EFAULT; \ __long_type(*(ptr)) __gu_val = 0; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(size) __gu_size = (size); \ + \ might_fault(); \ - if (access_ok(__gu_addr, (size))) { \ + if (access_ok(__gu_addr, __gu_size)) { \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -314,10 +332,13 @@ do { \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ + __typeof__(size) __gu_size = (size); \ + \ + __chk_user_ptr(__gu_addr); \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c index e34116255ced..957abd592075 100644 --- a/arch/powerpc/kernel/ima_arch.c +++ b/arch/powerpc/kernel/ima_arch.c @@ -19,12 +19,12 @@ bool arch_ima_get_secureboot(void) * to be stored as an xattr or as an appended signature. * * To avoid duplicate signature verification as much as possible, the IMA - * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE + * policy rule for module appraisal is added only if CONFIG_MODULE_SIG * is not enabled. */ static const char *const secure_rules[] = { "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", -#ifndef CONFIG_MODULE_SIG_FORCE +#ifndef CONFIG_MODULE_SIG "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", #endif NULL @@ -50,7 +50,7 @@ static const char *const secure_and_trusted_rules[] = { "measure func=KEXEC_KERNEL_CHECK template=ima-modsig", "measure func=MODULE_CHECK template=ima-modsig", "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", -#ifndef CONFIG_MODULE_SIG_FORCE +#ifndef CONFIG_MODULE_SIG "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", #endif NULL diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index a3951567118a..e7f8f9f1b3f4 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -218,11 +218,11 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) blr /* - * invalid clock + * syscall fallback */ 99: - li r3, EINVAL - crset so + li r0,__NR_clock_getres + sc blr .cfi_endproc V_FUNCTION_END(__kernel_clock_getres) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a32d478a7f41..b4c89a1acebb 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -303,12 +303,6 @@ SECTIONS *(.branch_lt) } -#ifdef CONFIG_DEBUG_INFO_BTF - .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { - *(.BTF) - } -#endif - .opd : AT(ADDR(.opd) - LOAD_OFFSET) { __start_opd = .; KEEP(*(.opd)) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 206156255247..7f3faf0dea25 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -60,6 +60,7 @@ struct pg_state { unsigned long start_address; unsigned long start_pa; unsigned long last_pa; + unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -157,9 +158,9 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { + if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = PAGE_SIZE >> 10; + delta = st->page_size >> 10; } else { pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; @@ -190,7 +191,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val) + unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; u64 pa = val & PTE_RPN_MASK; @@ -202,6 +203,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: @@ -213,7 +215,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ } else if (flag != st->current_flags || level != st->level || addr >= st->marker[1].start_address || - (pa != st->last_pa + PAGE_SIZE && + (pa != st->last_pa + st->page_size && (pa != st->start_pa || st->start_pa != st->last_pa))) { /* Check the PTE flags */ @@ -241,6 +243,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; st->current_flags = flag; st->level = level; } else { @@ -256,7 +259,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) for (i = 0; i < PTRS_PER_PTE; i++, pte++) { addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte)); + note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE); } } @@ -273,7 +276,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) /* pmd exists */ walk_pte(st, pmd, addr); else - note_page(st, addr, 3, pmd_val(*pmd)); + note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE); } } @@ -289,7 +292,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) /* pud exists */ walk_pmd(st, pud, addr); else - note_page(st, addr, 2, pud_val(*pud)); + note_page(st, addr, 2, pud_val(*pud), PUD_SIZE); } } @@ -308,7 +311,7 @@ static void walk_pagetables(struct pg_state *st) /* pgd exists */ walk_pud(st, pgd, addr); else - note_page(st, addr, 1, pgd_val(*pgd)); + note_page(st, addr, 1, pgd_val(*pgd), PGDIR_SIZE); } } @@ -363,7 +366,7 @@ static int ptdump_show(struct seq_file *m, void *v) /* Traverse kernel page tables */ walk_pagetables(&st); - note_page(&st, 0, 0, 0); + note_page(&st, 0, 0, 0, 0); return 0; } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index fe8d396e2301..16df9cc8f360 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/msi.h> +#include <linux/vmalloc.h> #include <asm/prom.h> #include <asm/io.h> @@ -1013,12 +1014,16 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { if (xd->eoi_mmio) { + unmap_kernel_range((unsigned long)xd->eoi_mmio, + 1u << xd->esb_shift); iounmap(xd->eoi_mmio); if (xd->eoi_mmio == xd->trig_mmio) xd->trig_mmio = NULL; xd->eoi_mmio = NULL; } if (xd->trig_mmio) { + unmap_kernel_range((unsigned long)xd->trig_mmio, + 1u << xd->esb_shift); iounmap(xd->trig_mmio); xd->trig_mmio = NULL; } diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index a131174a0a77..f310ad8ffcf7 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -11,13 +11,14 @@ config SOC_SIFIVE This enables support for SiFive SoC platform hardware. config SOC_VIRT - bool "QEMU Virt Machine" - select POWER_RESET_SYSCON - select POWER_RESET_SYSCON_POWEROFF - select GOLDFISH - select RTC_DRV_GOLDFISH - select SIFIVE_PLIC - help - This enables support for QEMU Virt Machine. + bool "QEMU Virt Machine" + select POWER_RESET + select POWER_RESET_SYSCON + select POWER_RESET_SYSCON_POWEROFF + select GOLDFISH + select RTC_DRV_GOLDFISH if RTC_CLASS + select SIFIVE_PLIC + help + This enables support for QEMU Virt Machine. endmenu diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h index a2c809df2733..56053c9838b2 100644 --- a/arch/riscv/include/asm/mmio.h +++ b/arch/riscv/include/asm/mmio.h @@ -16,6 +16,8 @@ #ifndef CONFIG_MMU #define pgprot_noncached(x) (x) +#define pgprot_writecombine(x) (x) +#define pgprot_device(x) (x) #endif /* CONFIG_MMU */ /* Generic IO read/write. These perform native-endian accesses. */ diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index 0234048b12bc..062efd3a1d5d 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -12,19 +12,14 @@ #include <linux/ptrace.h> #include <linux/interrupt.h> +#ifdef CONFIG_RISCV_BASE_PMU #define RISCV_BASE_COUNTERS 2 /* * The RISCV_MAX_COUNTERS parameter should be specified. */ -#ifdef CONFIG_RISCV_BASE_PMU #define RISCV_MAX_COUNTERS 2 -#endif - -#ifndef RISCV_MAX_COUNTERS -#error "Please provide a valid RISCV_MAX_COUNTERS for the PMU." -#endif /* * These are the indexes of bits in counteren register *minus* 1, @@ -82,6 +77,7 @@ struct riscv_pmu { int irq; }; +#endif #ifdef CONFIG_PERF_EVENTS #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs #endif diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 393f2014dfee..31d912944d8d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -460,12 +460,15 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #else /* CONFIG_MMU */ +#define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define VMALLOC_START 0 #define TASK_SIZE 0xffffffffUL +static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {} + #endif /* !CONFIG_MMU */ #define kern_addr_valid(addr) (1) /* FIXME */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f40205cb9a22..1dcc095dc23c 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -38,7 +38,7 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 0940681d2f68..19e46f4160cc 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -63,7 +63,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, #else /* !CONFIG_FRAME_POINTER */ -static void notrace walk_stackframe(struct task_struct *task, +void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg) { unsigned long sp, pc; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 33b16f4212f7..a4ee3a0e7d20 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -33,15 +33,15 @@ $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) # We also create a special relocatable object that should mirror the symbol -# table and layout of the linked DSO. With ld -R we can then refer to -# these symbols in the kernel code rather than hand-coded addresses. +# table and layout of the linked DSO. With ld --just-symbols we can then +# refer to these symbols in the kernel code rather than hand-coded addresses. SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ -Wl,--build-id -Wl,--hash-style=both $(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE $(call if_changed,vdsold) -LDFLAGS_vdso-syms.o := -r -R +LDFLAGS_vdso-syms.o := -r --just-symbols $(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE $(call if_changed,ld) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fab855963c73..1dc26384a6c4 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -46,7 +46,7 @@ static void setup_zero_page(void) memset((void *)empty_zero_page, 0, PAGE_SIZE); } -#ifdef CONFIG_DEBUG_VM +#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM) static inline void print_mlk(char *name, unsigned long b, unsigned long t) { pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t, @@ -149,7 +149,8 @@ void __init setup_bootmem(void) memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = max_pfn; #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index cd060b5dd8fd..e4dc64cc9c55 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -8,6 +8,10 @@ #include <linux/slab.h> #include <asm/pci_insn.h> +/* I/O size constraints */ +#define ZPCI_MAX_READ_SIZE 8 +#define ZPCI_MAX_WRITE_SIZE 128 + /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 #define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL @@ -140,7 +144,8 @@ static inline int zpci_memcpy_fromio(void *dst, while (n > 0) { size = zpci_get_max_write_size((u64 __force) src, - (u64) dst, n, 8); + (u64) dst, n, + ZPCI_MAX_READ_SIZE); rc = zpci_read_single(dst, src, size); if (rc) break; @@ -161,7 +166,8 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst, while (n > 0) { size = zpci_get_max_write_size((u64 __force) dst, - (u64) src, n, 128); + (u64) src, n, + ZPCI_MAX_WRITE_SIZE); if (size > 8) /* main path */ rc = zpci_write_block(dst, src, size); else diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 8415ae7d2a23..f9e4baa64b67 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -151,7 +151,7 @@ static int kexec_file_add_initrd(struct kimage *image, buf.mem += crashk_res.start; buf.memsz = buf.bufsz; - data->parm->initrd_start = buf.mem; + data->parm->initrd_start = data->memsz; data->parm->initrd_size = buf.memsz; data->memsz += buf.memsz; diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c index d5035de9020e..b7182cec48dc 100644 --- a/arch/s390/kernel/machine_kexec_reloc.c +++ b/arch/s390/kernel/machine_kexec_reloc.c @@ -28,6 +28,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, break; case R_390_64: /* Direct 64 bit. */ case R_390_GLOB_DAT: + case R_390_JMP_SLOT: *(u64 *)loc = val; break; case R_390_PC16: /* PC relative 16 bit. */ diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ed52ffa8d5d4..560310e29e27 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -626,10 +626,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu) * available for the guest are AQIC and TAPQ with the t bit set * since we do not set IC.3 (FIII) we currently will only intercept * the AQIC function code. + * Note: running nested under z/VM can result in intercepts for other + * function codes, e.g. PQAP(QCI). We do not support this and bail out. */ reg0 = vcpu->run->s.regs.gprs[0]; fc = (reg0 >> 24) & 0xff; - if (WARN_ON_ONCE(fc != 0x03)) + if (fc != 0x03) return -EOPNOTSUPP; /* PQAP instruction is allowed for guest kernel only */ diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 5674710a4841..7dfae86afa47 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -159,10 +159,13 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste &= ~_SEGMENT_ENTRY_NOEXEC; /* Set correct table type for 2G hugepages */ - if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; - else + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + if (likely(pte_present(pte))) + rste |= _REGION3_ENTRY_LARGE; + rste |= _REGION_ENTRY_TYPE_R3; + } else if (likely(pte_present(pte))) rste |= _SEGMENT_ENTRY_LARGE; + clear_huge_pte_skeys(mm, rste); pte_val(*ptep) = rste; } diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 0d3d8f170ea4..25208fa95426 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -309,14 +309,13 @@ out: int clp_disable_fh(struct zpci_dev *zdev) { - u32 fh = zdev->fh; int rc; if (!zdev_enabled(zdev)) return 0; rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); - zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 7d42a8794f10..020a2c514d96 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -11,6 +11,113 @@ #include <linux/mm.h> #include <linux/errno.h> #include <linux/pci.h> +#include <asm/pci_io.h> +#include <asm/pci_debug.h> + +static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset) +{ + struct { + u64 offset; + u8 cc; + u8 status; + } data = {offset, cc, status}; + + zpci_err_hex(&data, sizeof(data)); +} + +static inline int __pcistb_mio_inuser( + void __iomem *ioaddr, const void __user *src, + u64 len, u8 *status) +{ + int cc = -ENXIO; + + asm volatile ( + " sacf 256\n" + "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n" + "1: ipm %[cc]\n" + " srl %[cc],28\n" + "2: sacf 768\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : [cc] "+d" (cc), [len] "+d" (len) + : [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src)) + : "cc", "memory"); + *status = len >> 24 & 0xff; + return cc; +} + +static inline int __pcistg_mio_inuser( + void __iomem *ioaddr, const void __user *src, + u64 ulen, u8 *status) +{ + register u64 addr asm("2") = (u64 __force) ioaddr; + register u64 len asm("3") = ulen; + int cc = -ENXIO; + u64 val = 0; + u64 cnt = ulen; + u8 tmp; + + /* + * copy 0 < @len <= 8 bytes from @src into the right most bytes of + * a register, then store it to PCI at @ioaddr while in secondary + * address space. pcistg then uses the user mappings. + */ + asm volatile ( + " sacf 256\n" + "0: llgc %[tmp],0(%[src])\n" + " sllg %[val],%[val],8\n" + " aghi %[src],1\n" + " ogr %[val],%[tmp]\n" + " brctg %[cnt],0b\n" + "1: .insn rre,0xb9d40000,%[val],%[ioaddr]\n" + "2: ipm %[cc]\n" + " srl %[cc],28\n" + "3: sacf 768\n" + EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b) + : + [src] "+a" (src), [cnt] "+d" (cnt), + [val] "+d" (val), [tmp] "=d" (tmp), + [len] "+d" (len), [cc] "+d" (cc), + [ioaddr] "+a" (addr) + :: "cc", "memory"); + *status = len >> 24 & 0xff; + + /* did we read everything from user memory? */ + if (!cc && cnt != 0) + cc = -EFAULT; + + return cc; +} + +static inline int __memcpy_toio_inuser(void __iomem *dst, + const void __user *src, size_t n) +{ + int size, rc = 0; + u8 status = 0; + mm_segment_t old_fs; + + if (!src) + return -EINVAL; + + old_fs = enable_sacf_uaccess(); + while (n > 0) { + size = zpci_get_max_write_size((u64 __force) dst, + (u64 __force) src, n, + ZPCI_MAX_WRITE_SIZE); + if (size > 8) /* main path */ + rc = __pcistb_mio_inuser(dst, src, size, &status); + else + rc = __pcistg_mio_inuser(dst, src, size, &status); + if (rc) + break; + src += size; + dst += size; + n -= size; + } + disable_sacf_uaccess(old_fs); + if (rc) + zpci_err_mmio(rc, status, (__force u64) dst); + return rc; +} static long get_pfn(unsigned long user_addr, unsigned long access, unsigned long *pfn) @@ -46,6 +153,20 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length) return -EINVAL; + + /* + * Only support read access to MIO capable devices on a MIO enabled + * system. Otherwise we would have to check for every address if it is + * a special ZPCI_ADDR and we would have to do a get_pfn() which we + * don't need for MIO capable devices. + */ + if (static_branch_likely(&have_mio)) { + ret = __memcpy_toio_inuser((void __iomem *) mmio_addr, + user_buffer, + length); + return ret; + } + if (length > 64) { buf = kmalloc(length, GFP_KERNEL); if (!buf) @@ -56,7 +177,8 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, ret = get_pfn(mmio_addr, VM_WRITE, &pfn); if (ret) goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | + (mmio_addr & ~PAGE_MASK)); ret = -EFAULT; if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) @@ -72,6 +194,78 @@ out: return ret; } +static inline int __pcilg_mio_inuser( + void __user *dst, const void __iomem *ioaddr, + u64 ulen, u8 *status) +{ + register u64 addr asm("2") = (u64 __force) ioaddr; + register u64 len asm("3") = ulen; + u64 cnt = ulen; + int shift = ulen * 8; + int cc = -ENXIO; + u64 val, tmp; + + /* + * read 0 < @len <= 8 bytes from the PCI memory mapped at @ioaddr (in + * user space) into a register using pcilg then store these bytes at + * user address @dst + */ + asm volatile ( + " sacf 256\n" + "0: .insn rre,0xb9d60000,%[val],%[ioaddr]\n" + "1: ipm %[cc]\n" + " srl %[cc],28\n" + " ltr %[cc],%[cc]\n" + " jne 4f\n" + "2: ahi %[shift],-8\n" + " srlg %[tmp],%[val],0(%[shift])\n" + "3: stc %[tmp],0(%[dst])\n" + " aghi %[dst],1\n" + " brctg %[cnt],2b\n" + "4: sacf 768\n" + EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) + : + [cc] "+d" (cc), [val] "=d" (val), [len] "+d" (len), + [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), + [shift] "+d" (shift) + : + [ioaddr] "a" (addr) + : "cc", "memory"); + + /* did we write everything to the user space buffer? */ + if (!cc && cnt != 0) + cc = -EFAULT; + + *status = len >> 24 & 0xff; + return cc; +} + +static inline int __memcpy_fromio_inuser(void __user *dst, + const void __iomem *src, + unsigned long n) +{ + int size, rc = 0; + u8 status; + mm_segment_t old_fs; + + old_fs = enable_sacf_uaccess(); + while (n > 0) { + size = zpci_get_max_write_size((u64 __force) src, + (u64 __force) dst, n, + ZPCI_MAX_READ_SIZE); + rc = __pcilg_mio_inuser(dst, src, size, &status); + if (rc) + break; + src += size; + dst += size; + n -= size; + } + disable_sacf_uaccess(old_fs); + if (rc) + zpci_err_mmio(rc, status, (__force u64) dst); + return rc; +} + SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, void __user *, user_buffer, size_t, length) { @@ -86,12 +280,27 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length) return -EINVAL; + + /* + * Only support write access to MIO capable devices on a MIO enabled + * system. Otherwise we would have to check for every address if it is + * a special ZPCI_ADDR and we would have to do a get_pfn() which we + * don't need for MIO capable devices. + */ + if (static_branch_likely(&have_mio)) { + ret = __memcpy_fromio_inuser( + user_buffer, (const void __iomem *)mmio_addr, + length); + return ret; + } + if (length > 64) { buf = kmalloc(length, GFP_KERNEL); if (!buf) return -ENOMEM; - } else + } else { buf = local_buf; + } ret = get_pfn(mmio_addr, VM_READ, &pfn); if (ret) diff --git a/arch/sh/include/uapi/asm/sockios.h b/arch/sh/include/uapi/asm/sockios.h index 3da561453260..ef01ced9e169 100644 --- a/arch/sh/include/uapi/asm/sockios.h +++ b/arch/sh/include/uapi/asm/sockios.h @@ -2,6 +2,8 @@ #ifndef __ASM_SH_SOCKIOS_H #define __ASM_SH_SOCKIOS_H +#include <linux/time_types.h> + /* Socket-level I/O control calls. */ #define FIOGETOWN _IOR('f', 123, int) #define FIOSETOWN _IOW('f', 124, int) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index f56c3c9a9793..80061bc93bdc 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -331,9 +331,9 @@ static void __init srmmu_nocache_init(void) while (vaddr < srmmu_nocache_end) { pgd = pgd_offset_k(vaddr); - p4d = p4d_offset(__nocache_fix(pgd), vaddr); - pud = pud_offset(__nocache_fix(p4d), vaddr); - pmd = pmd_offset(__nocache_fix(pgd), vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); + pmd = pmd_offset(__nocache_fix(pud), vaddr); pte = pte_offset_kernel(__nocache_fix(pmd), vaddr); pteval = ((paddr >> 4) | SRMMU_ET_PTE | SRMMU_PRIV); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index beea77046f9b..0bc9a74468be 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -70,6 +70,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index 06ef2d4a4701..6737bcea1fa1 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2s_state *state, const u32 inc) { /* SIMD disables preemption, so relax after processing each page. */ - BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); + BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { blake2s_compress_generic(state, block, nblocks, inc); return; } - for (;;) { + do { const size_t blocks = min_t(size_t, nblocks, - PAGE_SIZE / BLAKE2S_BLOCK_SIZE); + SZ_4K / BLAKE2S_BLOCK_SIZE); kernel_fpu_begin(); if (IS_ENABLED(CONFIG_AS_AVX512) && @@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2s_state *state, kernel_fpu_end(); nblocks -= blocks; - if (!nblocks) - break; block += blocks * BLAKE2S_BLOCK_SIZE; - } + } while (nblocks); } EXPORT_SYMBOL(blake2s_compress_arch); diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 68a74953efaf..ebf2cd7ff2f0 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -154,9 +154,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, bytes <= CHACHA_BLOCK_SIZE) return chacha_crypt_generic(state, dst, src, bytes, nrounds); - kernel_fpu_begin(); - chacha_dosimd(state, dst, src, bytes, nrounds); - kernel_fpu_end(); + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_fpu_begin(); + chacha_dosimd(state, dst, src, todo, nrounds); + kernel_fpu_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); } EXPORT_SYMBOL(chacha_crypt_arch); diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c index f7567cbd35b6..80fcb85736e1 100644 --- a/arch/x86/crypto/nhpoly1305-avx2-glue.c +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -29,7 +29,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2); diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c index a661ede3b5cf..cc6b7c1a2705 100644 --- a/arch/x86/crypto/nhpoly1305-sse2-glue.c +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -29,7 +29,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 79bb58737d52..61b2bc8b6986 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, struct poly1305_arch_internal *state = ctx; /* SIMD disables preemption, so relax after processing each page. */ - BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || - PAGE_SIZE % POLY1305_BLOCK_SIZE); + BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || + SZ_4K % POLY1305_BLOCK_SIZE); if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || @@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, return; } - for (;;) { - const size_t bytes = min_t(size_t, len, PAGE_SIZE); + do { + const size_t bytes = min_t(size_t, len, SZ_4K); kernel_fpu_begin(); if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) @@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, else poly1305_blocks_avx(ctx, inp, bytes, padbit); kernel_fpu_end(); + len -= bytes; - if (!len) - break; inp += bytes; - } + } while (len); } static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 0789e13ece90..1c7f13bb6728 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 - /* - * Push registers and sanitize registers of values that a - * speculation attack might otherwise want to exploit. The - * lower registers are likely clobbered well before they - * could be put to use in a speculative execution gadget. - * Interleave XOR with PUSH for better uop scheduling: - */ .if \save_ret pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ @@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with pushq %rsi /* pt_regs->si */ .endif pushq \rdx /* pt_regs->dx */ - xorl %edx, %edx /* nospec dx */ pushq %rcx /* pt_regs->cx */ - xorl %ecx, %ecx /* nospec cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorl %r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorl %r9d, %r9d /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ - xorl %r10d, %r10d /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ - xorl %r11d, %r11d /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ - xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ - xorl %r12d, %r12d /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ - xorl %r13d, %r13d /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ - xorl %r14d, %r14d /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ - xorl %r15d, %r15d /* nospec r15*/ UNWIND_HINT_REGS + .if \save_ret pushq %rsi /* return address on top of stack */ .endif + + /* + * Sanitize registers of values that a speculation attack might + * otherwise want to exploit. The lower registers are likely clobbered + * well before they could be put to use in a speculative execution + * gadget. + */ + xorl %edx, %edx /* nospec dx */ + xorl %ecx, %ecx /* nospec cx */ + xorl %r8d, %r8d /* nospec r8 */ + xorl %r9d, %r9d /* nospec r9 */ + xorl %r10d, %r10d /* nospec r10 */ + xorl %r11d, %r11d /* nospec r11 */ + xorl %ebx, %ebx /* nospec rbx */ + xorl %ebp, %ebp /* nospec rbp */ + xorl %r12d, %r12d /* nospec r12 */ + xorl %r13d, %r13d /* nospec r13 */ + xorl %r14d, %r14d /* nospec r14 */ + xorl %r15d, %r15d /* nospec r15 */ + .endm .macro POP_REGS pop_rdi=1 skip_r11rcx=0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f2bb91e87877..faa53fee0663 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) */ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ - UNWIND_HINT_EMPTY POP_REGS pop_rdi=0 skip_r11rcx=1 /* @@ -258,6 +257,7 @@ syscall_return_via_sysret: */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY pushq RSP-RDI(%rdi) /* RSP */ pushq (%rdi) /* RDI */ @@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64) * %rdi: prev task * %rsi: next task */ -SYM_CODE_START(__switch_to_asm) - UNWIND_HINT_FUNC +SYM_FUNC_START(__switch_to_asm) /* * Save callee-saved registers * This must match the order in inactive_task_frame @@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm) popq %rbp jmp __switch_to -SYM_CODE_END(__switch_to_asm) +SYM_FUNC_END(__switch_to_asm) /* * A newly forked process directly context switches into this address. @@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start) * +----------------------------------------------------+ */ SYM_CODE_START(interrupt_entry) - UNWIND_HINT_FUNC + UNWIND_HINT_IRET_REGS offset=16 ASM_CLAC cld @@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry) pushq 5*8(%rdi) /* regs->eflags */ pushq 4*8(%rdi) /* regs->cs */ pushq 3*8(%rdi) /* regs->ip */ + UNWIND_HINT_IRET_REGS pushq 2*8(%rdi) /* regs->orig_ax */ pushq 8(%rdi) /* return address */ - UNWIND_HINT_FUNC movq (%rdi), %rdi jmp 2f @@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY /* Copy the IRET frame to the trampoline stack. */ pushq 6*8(%rdi) /* SS */ @@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit) movq PER_CPU_VAR(cpu_current_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp - UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE + UNWIND_HINT_REGS call do_exit SYM_CODE_END(rewind_stack_do_exit) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index dff6623804c2..dae71ebfa709 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1892,8 +1892,8 @@ static __initconst const u64 tnt_hw_cache_extra_regs static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1), EVENT_EXTRA_END }; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 624f5d9b0f79..acf76b466db6 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -73,7 +73,8 @@ static int hv_cpu_init(unsigned int cpu) struct page *pg; input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - pg = alloc_page(GFP_KERNEL); + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ + pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL); if (unlikely(!pg)) return -ENOMEM; *input_arg = page_address(pg); @@ -225,10 +226,18 @@ static int hv_cpu_die(unsigned int cpu) rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); if (re_ctrl.target_vp == hv_vp_index[cpu]) { - /* Reassign to some other online CPU */ + /* + * Reassign reenlightenment notifications to some other online + * CPU or just disable the feature if there are no online CPUs + * left (happens on hibernation). + */ new_cpu = cpumask_any_but(cpu_online_mask, cpu); - re_ctrl.target_vp = hv_vp_index[new_cpu]; + if (new_cpu < nr_cpu_ids) + re_ctrl.target_vp = hv_vp_index[new_cpu]; + else + re_ctrl.enabled = 0; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); } @@ -254,6 +263,7 @@ static int __init hv_pci_init(void) static int hv_suspend(void) { union hv_x64_msr_hypercall_contents hypercall_msr; + int ret; /* * Reset the hypercall page as it is going to be invalidated @@ -270,12 +280,17 @@ static int hv_suspend(void) hypercall_msr.enable = 0; wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - return 0; + ret = hv_cpu_die(0); + return ret; } static void hv_resume(void) { union hv_x64_msr_hypercall_contents hypercall_msr; + int ret; + + ret = hv_cpu_init(0); + WARN_ON(ret); /* Re-enable the hypercall page */ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -286,8 +301,16 @@ static void hv_resume(void) hv_hypercall_pg = hv_hypercall_pg_saved; hv_hypercall_pg_saved = NULL; + + /* + * Reenlightenment notifications are disabled by hv_cpu_die(0), + * reenable them here if hv_reenlightenment_cb was previously set. + */ + if (hv_reenlightenment_cb) + set_hv_tscchange_cb(hv_reenlightenment_cb); } +/* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */ static struct syscore_ops hv_syscore_ops = { .suspend = hv_suspend, .resume = hv_resume, diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index 31c379c1da41..0c814cd9ea42 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -9,6 +9,36 @@ #include <linux/mod_devicetable.h> +#define X86_CENTAUR_FAM6_C7_D 0xd +#define X86_CENTAUR_FAM6_NANO 0xf + +#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) + +/** + * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * Backport version to keep the SRBDS pile consistant. No shorter variants + * required for this. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ + .vendor = X86_VENDOR_##_vendor, \ + .family = _family, \ + .model = _model, \ + .steppings = _steppings, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ +} + /* * Match specific microcode revisions. * diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f3327cb56edf..69f7dcb1fa5c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -360,6 +360,7 @@ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ @@ -404,5 +405,6 @@ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 00f7cf45e699..8e95aa4b0d17 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -74,7 +74,7 @@ #define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT) /* 4GB broken PCI/AGP hardware bus master zone */ -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) +#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT)) #ifdef CONFIG_X86_32 /* The maximum address that we can perform a DMA transfer to on this platform */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 85be2f506272..89af0d2c62aa 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -56,6 +56,12 @@ struct dyn_arch_ftrace { #ifndef __ASSEMBLY__ +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +extern void set_ftrace_ops_ro(void); +#else +static inline void set_ftrace_ops_ro(void) { } +#endif + #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index 07344d82e88e..ac1a99ffbd8d 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -17,7 +17,7 @@ struct task_struct; #ifdef CONFIG_X86_IOPL_IOPERM void io_bitmap_share(struct task_struct *tsk); -void io_bitmap_exit(void); +void io_bitmap_exit(struct task_struct *tsk); void native_tss_update_io_bitmap(void); @@ -29,7 +29,7 @@ void native_tss_update_io_bitmap(void); #else static inline void io_bitmap_share(struct task_struct *tsk) { } -static inline void io_bitmap_exit(void) { } +static inline void io_bitmap_exit(struct task_struct *tsk) { } static inline void tss_update_io_bitmap(void) { } #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d79b40cd8283..c121b8f24597 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -574,6 +574,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 host_pkru; u32 pkru; u32 hflags; u64 efer; @@ -1664,8 +1665,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ - return (irq->delivery_mode == dest_Fixed || - irq->delivery_mode == dest_LowestPrio); + return (irq->delivery_mode == APIC_DM_FIXED || + irq->delivery_mode == APIC_DM_LOWEST); } static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d5e517d1c3dd..af64c8e80ff4 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -119,6 +119,10 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ +/* SRBDS support */ +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 64a03f226ab7..dca64a2dda9c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -256,6 +256,7 @@ static inline int pmd_large(pmd_t pte) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 64c3dce374e5..688d6f1e7a63 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -83,28 +83,35 @@ int set_direct_map_default_noflush(struct page *page); extern int kernel_set_to_readonly; #ifdef CONFIG_X86_64 -static inline int set_mce_nospec(unsigned long pfn) +/* + * Prevent speculative access to the page by either unmapping + * it (if we do not require access to any part of the page) or + * marking it uncacheable (if we want to try to retrieve data + * from non-poisoned lines in the page). + */ +static inline int set_mce_nospec(unsigned long pfn, bool unmap) { unsigned long decoy_addr; int rc; /* - * Mark the linear address as UC to make sure we don't log more - * errors because of speculative access to the page. * We would like to just call: - * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1); + * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_uc() properly sanitizing any __pa() + * This relies on set_memory_XX() properly sanitizing any __pa() * results with __PHYSICAL_MASK or PTE_PFN_MASK. */ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - rc = set_memory_uc(decoy_addr, 1); + if (unmap) + rc = set_memory_np(decoy_addr, 1); + else + rc = set_memory_uc(decoy_addr, 1); if (rc) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 91e29b6a86a5..9804a7957f4e 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -55,8 +55,13 @@ /* * Initialize the stackprotector canary value. * - * NOTE: this must only be called from functions that never return, + * NOTE: this must only be called from functions that never return * and it must always be inlined. + * + * In addition, it should be called from a compilation unit for which + * stack protector is disabled. Alternatively, the caller should not end + * with a function call which gets tail-call optimized as that would + * lead to checking a modified canary value. */ static __always_inline void boot_init_stack_canary(void) { diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 499578f7e6d7..70fc159ebe69 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -19,7 +19,7 @@ struct unwind_state { #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; - struct pt_regs *regs; + struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h index 196fdd02b8b1..be5e2e747f50 100644 --- a/arch/x86/include/uapi/asm/unistd.h +++ b/arch/x86/include/uapi/asm/unistd.h @@ -2,8 +2,15 @@ #ifndef _UAPI_ASM_X86_UNISTD_H #define _UAPI_ASM_X86_UNISTD_H -/* x32 syscall flag bit */ -#define __X32_SYSCALL_BIT 0x40000000UL +/* + * x32 syscall flag bit. Some user programs expect syscall NR macros + * and __X32_SYSCALL_BIT to have type int, even though syscall numbers + * are, for practical purposes, unsigned long. + * + * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right + * thing regardless. + */ +#define __X32_SYSCALL_BIT 0x40000000 #ifndef __KERNEL__ # ifdef __i386__ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5f973fed3c9f..e289722b04f6 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * According to Intel, MFENCE can do the serialization here. */ asm volatile("mfence" : : : "memory"); - - printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } @@ -552,7 +550,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); #define DEADLINE_MODEL_MATCH_REV(model, rev) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev } -static u32 hsx_deadline_rev(void) +static __init u32 hsx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x3a; /* EP */ @@ -562,7 +560,7 @@ static u32 hsx_deadline_rev(void) return ~0U; } -static u32 bdx_deadline_rev(void) +static __init u32 bdx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x00000011; @@ -574,7 +572,7 @@ static u32 bdx_deadline_rev(void) return ~0U; } -static u32 skx_deadline_rev(void) +static __init u32 skx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x03: return 0x01000136; @@ -587,7 +585,7 @@ static u32 skx_deadline_rev(void) return ~0U; } -static const struct x86_cpu_id deadline_match[] = { +static const struct x86_cpu_id deadline_match[] __initconst = { DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev), @@ -609,18 +607,19 @@ static const struct x86_cpu_id deadline_match[] = { {}, }; -static void apic_check_deadline_errata(void) +static __init bool apic_validate_deadline_timer(void) { const struct x86_cpu_id *m; u32 rev; - if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) || - boot_cpu_has(X86_FEATURE_HYPERVISOR)) - return; + if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return false; + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; m = x86_match_cpu(deadline_match); if (!m) - return; + return true; /* * Function pointers will have the MSB set due to address layout, @@ -632,11 +631,12 @@ static void apic_check_deadline_errata(void) rev = (u32)m->driver_data; if (boot_cpu_data.microcode >= rev) - return; + return true; setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " "please update microcode to version: 0x%x (or later)\n", rev); + return false; } /* @@ -2098,7 +2098,8 @@ void __init init_apic_mappings(void) { unsigned int new_apicid; - apic_check_deadline_errata(); + if (apic_validate_deadline_timer()) + pr_debug("TSC deadline timer available\n"); if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1f875fbe1384..f04cc01e629e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1111,8 +1111,7 @@ static const int amd_erratum_383[] = /* #1054: Instructions Retired Performance Counter May Be Inaccurate */ static const int amd_erratum_1054[] = - AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); - + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ed54b3b21c39..b53dcff21438 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -41,6 +41,7 @@ static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); static void __init mds_print_mitigation(void); static void __init taa_select_mitigation(void); +static void __init srbds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -108,6 +109,7 @@ void __init check_bugs(void) l1tf_select_mitigation(); mds_select_mitigation(); taa_select_mitigation(); + srbds_select_mitigation(); /* * As MDS and TAA mitigations are inter-related, print MDS @@ -398,6 +400,97 @@ static int __init tsx_async_abort_parse_cmdline(char *str) early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "SRBDS: " fmt + +enum srbds_mitigations { + SRBDS_MITIGATION_OFF, + SRBDS_MITIGATION_UCODE_NEEDED, + SRBDS_MITIGATION_FULL, + SRBDS_MITIGATION_TSX_OFF, + SRBDS_MITIGATION_HYPERVISOR, +}; + +static enum srbds_mitigations srbds_mitigation __ro_after_init = SRBDS_MITIGATION_FULL; + +static const char * const srbds_strings[] = { + [SRBDS_MITIGATION_OFF] = "Vulnerable", + [SRBDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [SRBDS_MITIGATION_FULL] = "Mitigation: Microcode", + [SRBDS_MITIGATION_TSX_OFF] = "Mitigation: TSX disabled", + [SRBDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status", +}; + +static bool srbds_off; + +void update_srbds_msr(void) +{ + u64 mcu_ctrl; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return; + + if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + + switch (srbds_mitigation) { + case SRBDS_MITIGATION_OFF: + case SRBDS_MITIGATION_TSX_OFF: + mcu_ctrl |= RNGDS_MITG_DIS; + break; + case SRBDS_MITIGATION_FULL: + mcu_ctrl &= ~RNGDS_MITG_DIS; + break; + default: + break; + } + + wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); +} + +static void __init srbds_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return; + + /* + * Check to see if this is one of the MDS_NO systems supporting + * TSX that are only exposed to SRBDS when TSX is enabled. + */ + ia32_cap = x86_read_arch_cap_msr(); + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; + else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; + else if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) + srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED; + else if (cpu_mitigations_off() || srbds_off) + srbds_mitigation = SRBDS_MITIGATION_OFF; + + update_srbds_msr(); + pr_info("%s\n", srbds_strings[srbds_mitigation]); +} + +static int __init srbds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return 0; + + srbds_off = !strcmp(str, "off"); + return 0; +} +early_param("srbds", srbds_parse_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt enum spectre_v1_mitigation { @@ -495,7 +588,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline); static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; -static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = +static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = SPECTRE_V2_USER_NONE; #ifdef CONFIG_RETPOLINE @@ -641,15 +736,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) break; } - /* - * At this point, an STIBP mode other than "off" has been set. - * If STIBP support is not being forced, check if STIBP always-on - * is preferred. - */ - if (mode != SPECTRE_V2_USER_STRICT && - boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) - mode = SPECTRE_V2_USER_STRICT_PREFERRED; - /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); @@ -672,23 +758,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); + + spectre_v2_user_ibpb = mode; } - /* If enhanced IBRS is enabled no STIBP required */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + /* + * If enhanced IBRS is enabled or SMT impossible, STIBP is not + * required. + */ + if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; /* - * If SMT is not possible or STIBP is not available clear the STIBP - * mode. + * At this point, an STIBP mode other than "off" has been set. + * If STIBP support is not being forced, check if STIBP always-on + * is preferred. + */ + if (mode != SPECTRE_V2_USER_STRICT && + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + + /* + * If STIBP is not available, clear the STIBP mode. */ - if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + if (!boot_cpu_has(X86_FEATURE_STIBP)) mode = SPECTRE_V2_USER_NONE; + + spectre_v2_user_stibp = mode; + set_mode: - spectre_v2_user = mode; - /* Only print the STIBP mode when SMT possible */ - if (smt_possible) - pr_info("%s\n", spectre_v2_user_strings[mode]); + pr_info("%s\n", spectre_v2_user_strings[mode]); } static const char * const spectre_v2_strings[] = { @@ -921,7 +1020,7 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; case SPECTRE_V2_USER_STRICT: @@ -1164,14 +1263,19 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { case PR_SPEC_ENABLE: - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; /* * Indirect branch speculation is always disabled in strict - * mode. + * mode. It can neither be enabled if it was force-disabled + * by a previous prctl call. + */ - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + task_spec_ib_force_disable(task)) return -EPERM; task_clear_spec_ib_disable(task); task_update_spec_tif(task); @@ -1182,10 +1286,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always allowed when * mitigation is force disabled. */ - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) return 0; task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) @@ -1216,7 +1322,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); - if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -1247,22 +1354,24 @@ static int ib_prctl_get(struct task_struct *task) if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return PR_SPEC_NOT_AFFECTED; - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: - return PR_SPEC_DISABLE; - default: + } else return PR_SPEC_NOT_AFFECTED; - } } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) @@ -1501,7 +1610,7 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: @@ -1528,6 +1637,11 @@ static char *ibpb_state(void) return ""; } +static ssize_t srbds_show_state(char *buf) +{ + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -1572,6 +1686,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITLB_MULTIHIT: return itlb_multihit_show_state(buf); + case X86_BUG_SRBDS: + return srbds_show_state(buf); + default: break; } @@ -1618,4 +1735,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr { return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); } + +ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4cdb123ff66a..0567448124e1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1075,9 +1075,30 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; -static bool __init cpu_matches(unsigned long which) +#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, steppings, \ + X86_FEATURE_ANY, issues) + +#define SRBDS BIT(0) + +static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS), + {} +}; + +static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which) { - const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); + const struct x86_cpu_id *m = x86_match_cpu(table); return m && !!(m->driver_data & which); } @@ -1097,31 +1118,34 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) u64 ia32_cap = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ - if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && + !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); - if (cpu_matches(NO_SPECULATION)) + if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - if (!cpu_matches(NO_SPECTRE_V2)) + if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && + !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { + if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && + !(ia32_cap & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); - if (cpu_matches(MSBDS_ONLY)) + if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); } - if (!cpu_matches(NO_SWAPGS)) + if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS)) setup_force_cpu_bug(X86_BUG_SWAPGS); /* @@ -1139,7 +1163,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); - if (cpu_matches(NO_MELTDOWN)) + /* + * SRBDS affects CPUs which support RDRAND or RDSEED and are listed + * in the vulnerability blacklist. + */ + if ((cpu_has(c, X86_FEATURE_RDRAND) || + cpu_has(c, X86_FEATURE_RDSEED)) && + cpu_matches(cpu_vuln_blacklist, SRBDS)) + setup_force_cpu_bug(X86_BUG_SRBDS); + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -1148,7 +1181,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (cpu_matches(NO_L1TF)) + if (cpu_matches(cpu_vuln_whitelist, NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); @@ -1589,6 +1622,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); + update_srbds_msr(); } static __init int setup_noclflush(char *arg) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 37fdefd14f28..fb538fccd24c 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -77,6 +77,7 @@ extern void detect_ht(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); +extern void update_srbds_msr(void); extern u64 x86_read_arch_cap_msr(void); diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 6dd78d8235e4..2f163e6646b6 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -34,13 +34,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) const struct x86_cpu_id *m; struct cpuinfo_x86 *c = &boot_cpu_data; - for (m = match; m->vendor | m->family | m->model | m->feature; m++) { + for (m = match; + m->vendor | m->family | m->model | m->steppings | m->feature; + m++) { if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) continue; if (m->family != X86_FAMILY_ANY && c->x86 != m->family) continue; if (m->model != X86_MODEL_ANY && c->x86_model != m->model) continue; + if (m->steppings != X86_STEPPING_ANY && + !(BIT(c->x86_stepping) & m->steppings)) + continue; if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) continue; return m; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 2c4f949611e4..410d3868bf33 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -527,6 +527,13 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); +static bool whole_page(struct mce *m) +{ + if (!mca_cfg.ser || !(m->status & MCI_STATUS_MISCV)) + return true; + return MCI_MISC_ADDR_LSB(m->misc) >= PAGE_SHIFT; +} + bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) @@ -598,7 +605,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, pfn = mce->addr >> PAGE_SHIFT; if (!memory_failure(pfn, 0)) - set_mce_nospec(pfn); + set_mce_nospec(pfn, whole_page(mce)); return NOTIFY_OK; } @@ -1096,7 +1103,7 @@ static int do_memory_failure(struct mce *m) if (ret) pr_err("Memory error not recovered"); else - set_mce_nospec(m->addr >> PAGE_SHIFT); + set_mce_nospec(m->addr >> PAGE_SHIFT, whole_page(m)); return ret; } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a1806598aaa4..cf2f2a85f087 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -954,18 +954,31 @@ static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) return true; } -/* - * This is similar to user_regset_copyout(), but will not add offset to - * the source data pointer or increment pos, count, kbuf, and ubuf. - */ -static inline void -__copy_xstate_to_kernel(void *kbuf, const void *data, - unsigned int offset, unsigned int size, unsigned int size_total) +static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count) { - if (offset < size_total) { - unsigned int copy = min(size, size_total - offset); + if (*pos < to) { + unsigned size = to - *pos; + + if (size > *count) + size = *count; + memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size); + *kbuf += size; + *pos += size; + *count -= size; + } +} - memcpy(kbuf + offset, data, copy); +static void copy_part(unsigned offset, unsigned size, void *from, + void **kbuf, unsigned *pos, unsigned *count) +{ + fill_gap(offset, kbuf, pos, count); + if (size > *count) + size = *count; + if (size) { + memcpy(*kbuf, from, size); + *kbuf += size; + *pos += size; + *count -= size; } } @@ -978,8 +991,9 @@ __copy_xstate_to_kernel(void *kbuf, const void *data, */ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) { - unsigned int offset, size; struct xstate_header header; + const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr); + unsigned count = size_total; int i; /* @@ -995,46 +1009,42 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of header.xfeatures = xsave->header.xfeatures; header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; + if (header.xfeatures & XFEATURE_MASK_FP) + copy_part(0, off_mxcsr, + &xsave->i387, &kbuf, &offset_start, &count); + if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)) + copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE, + &xsave->i387.mxcsr, &kbuf, &offset_start, &count); + if (header.xfeatures & XFEATURE_MASK_FP) + copy_part(offsetof(struct fxregs_state, st_space), 128, + &xsave->i387.st_space, &kbuf, &offset_start, &count); + if (header.xfeatures & XFEATURE_MASK_SSE) + copy_part(xstate_offsets[XFEATURE_MASK_SSE], 256, + &xsave->i387.xmm_space, &kbuf, &offset_start, &count); + /* + * Fill xsave->i387.sw_reserved value for ptrace frame: + */ + copy_part(offsetof(struct fxregs_state, sw_reserved), 48, + xstate_fx_sw_bytes, &kbuf, &offset_start, &count); /* * Copy xregs_state->header: */ - offset = offsetof(struct xregs_state, header); - size = sizeof(header); - - __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total); + copy_part(offsetof(struct xregs_state, header), sizeof(header), + &header, &kbuf, &offset_start, &count); - for (i = 0; i < XFEATURE_MAX; i++) { + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { /* * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { void *src = __raw_xsave_addr(xsave, i); - offset = xstate_offsets[i]; - size = xstate_sizes[i]; - - /* The next component has to fit fully into the output buffer: */ - if (offset + size > size_total) - break; - - __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); + copy_part(xstate_offsets[i], xstate_sizes[i], + src, &kbuf, &offset_start, &count); } } - - if (xfeatures_mxcsr_quirk(header.xfeatures)) { - offset = offsetof(struct fxregs_state, mxcsr); - size = MXCSR_AND_FLAGS_SIZE; - __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total); - } - - /* - * Fill xsave->i387.sw_reserved value for ptrace frame: - */ - offset = offsetof(struct fxregs_state, sw_reserved); - size = sizeof(xstate_fx_sw_bytes); - - __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total); + fill_gap(size_total, &kbuf, &offset_start, &count); return 0; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 37a0aeaf89e7..b0e641793be4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -407,7 +407,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) set_vm_flush_reset_perms(trampoline); - set_memory_ro((unsigned long)trampoline, npages); + if (likely(system_state != SYSTEM_BOOTING)) + set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: @@ -415,6 +416,32 @@ fail: return 0; } +void set_ftrace_ops_ro(void) +{ + struct ftrace_ops *ops; + unsigned long start_offset; + unsigned long end_offset; + unsigned long npages; + unsigned long size; + + do_for_each_ftrace_op(ops, ftrace_ops_list) { + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + continue; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + start_offset = (unsigned long)ftrace_regs_caller; + end_offset = (unsigned long)ftrace_regs_caller_end; + } else { + start_offset = (unsigned long)ftrace_caller; + end_offset = (unsigned long)ftrace_epilogue; + } + size = end_offset - start_offset; + size = size + RET_SIZE + sizeof(void *); + npages = DIV_ROUND_UP(size, PAGE_SIZE); + set_memory_ro((unsigned long)ops->trampoline, npages); + } while_for_each_ftrace_op(ops); +} + static unsigned long calc_trampoline_call_offset(bool save_regs) { unsigned long start_offset; diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 8abeee0dd7bf..fce678f4471e 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -32,15 +32,15 @@ void io_bitmap_share(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } -static void task_update_io_bitmap(void) +static void task_update_io_bitmap(struct task_struct *tsk) { - struct thread_struct *t = ¤t->thread; + struct thread_struct *t = &tsk->thread; if (t->iopl_emul == 3 || t->io_bitmap) { /* TSS update is handled on exit to user space */ - set_thread_flag(TIF_IO_BITMAP); + set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } else { - clear_thread_flag(TIF_IO_BITMAP); + clear_tsk_thread_flag(tsk, TIF_IO_BITMAP); /* Invalidate TSS */ preempt_disable(); tss_update_io_bitmap(); @@ -48,12 +48,12 @@ static void task_update_io_bitmap(void) } } -void io_bitmap_exit(void) +void io_bitmap_exit(struct task_struct *tsk) { - struct io_bitmap *iobm = current->thread.io_bitmap; + struct io_bitmap *iobm = tsk->thread.io_bitmap; - current->thread.io_bitmap = NULL; - task_update_io_bitmap(); + tsk->thread.io_bitmap = NULL; + task_update_io_bitmap(tsk); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } @@ -101,7 +101,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) if (!iobm) return -ENOMEM; refcount_set(&iobm->refcnt, 1); - io_bitmap_exit(); + io_bitmap_exit(current); } /* @@ -133,7 +133,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) } /* All permissions dropped? */ if (max_long == UINT_MAX) { - io_bitmap_exit(); + io_bitmap_exit(current); return 0; } @@ -191,7 +191,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) } t->iopl_emul = level; - task_update_io_bitmap(); + task_update_io_bitmap(current); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3053c85e0e42..3d88300ec306 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -97,7 +97,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) } /* - * Free current thread data structures etc.. + * Free thread data structures etc.. */ void exit_thread(struct task_struct *tsk) { @@ -105,7 +105,7 @@ void exit_thread(struct task_struct *tsk) struct fpu *fpu = &t->fpu; if (test_thread_flag(TIF_IO_BITMAP)) - io_bitmap_exit(); + io_bitmap_exit(tsk); free_vm86(t); @@ -546,28 +546,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, lockdep_assert_irqs_disabled(); - /* - * If TIF_SSBD is different, select the proper mitigation - * method. Note that if SSBD mitigation is disabled or permanentely - * enabled this branch can't be taken because nothing can set - * TIF_SSBD. - */ - if (tif_diff & _TIF_SSBD) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + /* Handle change of TIF_SSBD depending on the mitigation method. */ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_ssb_virt_state(tifn); - } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_core_ssb_state(tifn); - } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - msr |= ssbd_tif_to_spec_ctrl(tifn); - updmsr = true; - } + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + updmsr |= !!(tif_diff & _TIF_SSBD); + msr |= ssbd_tif_to_spec_ctrl(tifn); } - /* - * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, - * otherwise avoid the MSR write. - */ + /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */ if (IS_ENABLED(CONFIG_SMP) && static_branch_unlikely(&switch_to_cond_stibp)) { updmsr |= !!(tif_diff & _TIF_SPEC_IB); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0cc7c0b106bb..762f5c1465a6 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -197,6 +197,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), }, }, + { /* Handle problems with rebooting on Apple MacBook6,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook6,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"), + }, + }, { /* Handle problems with rebooting on Apple MacBookPro5 */ .callback = set_pci_reboot, .ident = "Apple MacBookPro5", diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 69881b2d446c..9674321ce3a3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -262,6 +262,14 @@ static void notrace start_secondary(void *unused) wmb(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + + /* + * Prevent tail call to cpu_startup_entry() because the stack protector + * guard has been changed a couple of function calls up, in + * boot_init_stack_canary() and must not be checked before tail calling + * another function. + */ + prevent_tail_call_optimization(); } /** diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index d8673d8a779b..36a585b80d9e 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,10 +25,6 @@ #include <asm/hpet.h> #include <asm/time.h> -#ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; -#endif - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index e9cc182aa97e..1a90abeca5f3 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -142,9 +142,6 @@ static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; - if (!orc_init) - return NULL; - if (ip == 0) return &null_orc_entry; @@ -317,12 +314,19 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) { + struct task_struct *task = state->task; + if (unwind_done(state)) return NULL; if (state->regs) return &state->regs->ip; + if (task != current && state->sp == task->thread.sp) { + struct inactive_task_frame *frame = (void *)task->thread.sp; + return &frame->ret_addr; + } + if (state->sp) return (unsigned long *)state->sp - 1; @@ -381,9 +385,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr return true; } +/* + * If state->regs is non-NULL, and points to a full pt_regs, just get the reg + * value from state->regs. + * + * Otherwise, if state->regs just points to IRET regs, and the previous frame + * had full regs, it's safe to get the value from the previous regs. This can + * happen when early/late IRQ entry code gets interrupted by an NMI. + */ +static bool get_reg(struct unwind_state *state, unsigned int reg_off, + unsigned long *val) +{ + unsigned int reg = reg_off/8; + + if (!state->regs) + return false; + + if (state->full_regs) { + *val = ((unsigned long *)state->regs)[reg]; + return true; + } + + if (state->prev_regs) { + *val = ((unsigned long *)state->prev_regs)[reg]; + return true; + } + + return false; +} + bool unwind_next_frame(struct unwind_state *state) { - unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp; + unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp; enum stack_type prev_type = state->stack_info.type; struct orc_entry *orc; bool indirect = false; @@ -445,39 +478,35 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_REG_R10: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) { orc_warn("missing regs for base reg R10 at ip %pB\n", (void *)state->ip); goto err; } - sp = state->regs->r10; break; case ORC_REG_R13: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) { orc_warn("missing regs for base reg R13 at ip %pB\n", (void *)state->ip); goto err; } - sp = state->regs->r13; break; case ORC_REG_DI: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) { orc_warn("missing regs for base reg DI at ip %pB\n", (void *)state->ip); goto err; } - sp = state->regs->di; break; case ORC_REG_DX: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) { orc_warn("missing regs for base reg DX at ip %pB\n", (void *)state->ip); goto err; } - sp = state->regs->dx; break; default: @@ -504,6 +533,7 @@ bool unwind_next_frame(struct unwind_state *state) state->sp = sp; state->regs = NULL; + state->prev_regs = NULL; state->signal = false; break; @@ -515,6 +545,7 @@ bool unwind_next_frame(struct unwind_state *state) } state->regs = (struct pt_regs *)sp; + state->prev_regs = NULL; state->full_regs = true; state->signal = true; break; @@ -526,6 +557,8 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } + if (state->full_regs) + state->prev_regs = state->regs; state->regs = (void *)sp - IRET_FRAME_OFFSET; state->full_regs = false; state->signal = true; @@ -534,14 +567,14 @@ bool unwind_next_frame(struct unwind_state *state) default: orc_warn("unknown .orc_unwind entry type %d for ip %pB\n", orc->type, (void *)orig_ip); - break; + goto err; } /* Find BP: */ switch (orc->bp_reg) { case ORC_REG_UNDEFINED: - if (state->regs && state->full_regs) - state->bp = state->regs->bp; + if (get_reg(state, offsetof(struct pt_regs, bp), &tmp)) + state->bp = tmp; break; case ORC_REG_PREV_SP: @@ -588,17 +621,20 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, memset(state, 0, sizeof(*state)); state->task = task; + if (!orc_init) + goto err; + /* * Refuse to unwind the stack of a task while it's executing on another * CPU. This check is racy, but that's ok: the unwinder has other * checks to prevent it from going off the rails. */ if (task_on_another_cpu(task)) - goto done; + goto err; if (regs) { if (user_mode(regs)) - goto done; + goto the_end; state->ip = regs->ip; state->sp = regs->sp; @@ -631,6 +667,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, * generate some kind of backtrace if this happens. */ void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); + state->error = true; if (get_stack_info(next_page, state->task, &state->stack_info, &state->stack_mask)) return; @@ -651,13 +688,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Otherwise, skip ahead to the user-specified starting frame: */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->sp <= (unsigned long)first_frame)) + state->sp < (unsigned long)first_frame)) unwind_next_frame(state); return; -done: +err: + state->error = true; +the_end: state->stack_info.type = STACK_TYPE_UNKNOWN; - return; } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e3296aa028fe..ccb2dec210ef 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -39,13 +39,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; #else OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; #endif +jiffies = jiffies_64; + #if defined(CONFIG_X86_64) /* * On 64-bit, align RODATA to 2MB so we retain large page mappings for diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 750ff0b29404..d057376bd3d3 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, } /* - * AMD SVM AVIC accelerate EOI write and do not trap, - * in-kernel IOAPIC will not be able to receive the EOI. - * In this case, we do lazy update of the pending EOI when - * trying to set IOAPIC irq. + * AMD SVM AVIC accelerate EOI write iff the interrupt is edge + * triggered, in which case the in-kernel IOAPIC will not be able + * to receive the EOI. In this case, we do a lazy update of the + * pending EOI when trying to set IOAPIC irq. */ - if (kvm_apicv_activated(ioapic->kvm)) + if (edge && kvm_apicv_activated(ioapic->kvm)) ioapic_lazy_update_eoi(ioapic, irq); /* diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 87e9ba27ada1..ea6fa05e2fd9 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -343,6 +343,8 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); + WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; @@ -591,16 +593,15 @@ static void kvm_mmu_reset_all_pte_masks(void) * the most significant bits of legal physical address space. */ shadow_nonpresent_or_rsvd_mask = 0; - low_phys_bits = boot_cpu_data.x86_cache_bits; - if (boot_cpu_data.x86_cache_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) { + low_phys_bits = boot_cpu_data.x86_phys_bits; + if (boot_cpu_has_bug(X86_BUG_L1TF) && + !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= + 52 - shadow_nonpresent_or_rsvd_mask_len)) { + low_phys_bits = boot_cpu_data.x86_cache_bits + - shadow_nonpresent_or_rsvd_mask_len; shadow_nonpresent_or_rsvd_mask = - rsvd_bits(boot_cpu_data.x86_cache_bits - - shadow_nonpresent_or_rsvd_mask_len, - boot_cpu_data.x86_cache_bits - 1); - low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; - } else - WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF)); + rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); + } shadow_nonpresent_or_rsvd_lower_gfn_mask = GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); @@ -6131,25 +6132,16 @@ static void kvm_set_mmio_spte_mask(void) u64 mask; /* - * Set the reserved bits and the present bit of an paging-structure - * entry to generate page fault with PFER.RSV = 1. - */ - - /* - * Mask the uppermost physical address bit, which would be reserved as - * long as the supported physical address width is less than 52. + * Set a reserved PA bit in MMIO SPTEs to generate page faults with + * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT + * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports + * 52-bit physical addresses then there are no reserved PA bits in the + * PTEs and so the reserved PA approach must be disabled. */ - mask = 1ull << 51; - - /* Set the present bit. */ - mask |= 1ull; - - /* - * If reserved bit is not supported, clear the present bit to disable - * mmio page fault. - */ - if (shadow_phys_bits == 52) - mask &= ~1ull; + if (shadow_phys_bits < 52) + mask = BIT_ULL(51) | PT_PRESENT_MASK; + else + mask = 0; kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 451377533bcb..eee7cb0e1d95 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1886,7 +1886,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, return NULL; /* Pin the user virtual address. */ - npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages); + npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); if (npinned != npages) { pr_err("SEV: Failure locking %lu pages.\n", npages); goto err; @@ -3236,8 +3236,8 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) return NESTED_EXIT_HOST; break; case SVM_EXIT_EXCP_BASE + PF_VECTOR: - /* When we're shadowing, trap PFs, but not async PF */ - if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) + /* Trap async PF even if not shadowing */ + if (!npt_enabled || svm->vcpu.arch.apf.host_apf_reason) return NESTED_EXIT_HOST; break; default: @@ -3326,7 +3326,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr dst->iopm_base_pa = from->iopm_base_pa; dst->msrpm_base_pa = from->msrpm_base_pa; dst->tsc_offset = from->tsc_offset; - dst->asid = from->asid; + /* asid not copied, it is handled manually for svm->vmcb. */ dst->tlb_ctl = from->tlb_ctl; dst->int_ctl = from->int_ctl; dst->int_vector = from->int_vector; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index eec7b2d93104..a03db4a75977 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -303,7 +303,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) cpu = get_cpu(); prev = vmx->loaded_vmcs; vmx->loaded_vmcs = vmcs; - vmx_vcpu_load_vmcs(vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, prev); vmx_sync_vmcs_host_state(vmx, prev); put_cpu(); @@ -5504,6 +5504,23 @@ static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu, return 1 & (b >> (field & 7)); } +static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) +{ + u32 entry_intr_info = vmcs12->vm_entry_intr_info_field; + + if (nested_cpu_has_mtf(vmcs12)) + return true; + + /* + * An MTF VM-exit may be injected into the guest by setting the + * interruption-type to 7 (other event) and the vector field to 0. Such + * is the case regardless of the 'monitor trap flag' VM-execution + * control. + */ + return entry_intr_info == (INTR_INFO_VALID_MASK + | INTR_TYPE_OTHER_EVENT); +} + /* * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we * should handle it ourselves in L0 (and then continue L2). Only call this @@ -5545,7 +5562,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) vmcs_read32(VM_EXIT_INTR_ERROR_CODE), KVM_ISA_VMX); - switch (exit_reason) { + switch ((u16)exit_reason) { case EXIT_REASON_EXCEPTION_NMI: if (is_nmi(intr_info)) return false; @@ -5618,7 +5635,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_MWAIT_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); case EXIT_REASON_MONITOR_TRAP_FLAG: - return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG); + return nested_vmx_exit_handled_mtf(vmcs12); case EXIT_REASON_MONITOR_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); case EXIT_REASON_PAUSE_INSTRUCTION: diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 09b0937d56b1..19717d0a1100 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -12,6 +12,7 @@ #define __ex(x) __kvm_handle_fault_on_reboot(x) +asmlinkage void vmread_error(unsigned long field, bool fault); __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, bool fault); void vmwrite_error(unsigned long field, unsigned long value); diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 861ae40e7144..99410f372c41 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -86,6 +86,9 @@ SYM_FUNC_START(vmx_vmexit) /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ + or $1, %_ASM_AX + pop %_ASM_AX .Lvmexit_skip_rsb: #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c1ffe7d24f83..b29902c521f2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1314,10 +1314,12 @@ after_clear_sn: pi_set_on(pi_desc); } -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, + struct loaded_vmcs *buddy) { struct vcpu_vmx *vmx = to_vmx(vcpu); bool already_loaded = vmx->loaded_vmcs->cpu == cpu; + struct vmcs *prev; if (!already_loaded) { loaded_vmcs_clear(vmx->loaded_vmcs); @@ -1336,10 +1338,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) local_irq_enable(); } - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { + prev = per_cpu(current_vmcs, cpu); + if (prev != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); - indirect_branch_prediction_barrier(); + + /* + * No indirect branch prediction barrier needed when switching + * the active VMCS within a guest, e.g. on nested VM-Enter. + * The L1 VMM can protect itself with retpolines, IBPB or IBRS. + */ + if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) + indirect_branch_prediction_barrier(); } if (!already_loaded) { @@ -1376,11 +1386,10 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx_vcpu_load_vmcs(vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, NULL); vmx_vcpu_pi_load(vcpu, cpu); - vmx->host_pkru = read_pkru(); vmx->host_debugctlmsr = get_debugctlmsr(); } @@ -6538,11 +6547,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) kvm_load_guest_xsave_state(vcpu); - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && - vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vcpu->arch.pkru); - pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); @@ -6631,18 +6635,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_exit(vmx); - /* - * eager fpu is enabled if PKEY is supported and CR4 is switched - * back on host, so it is safe to read guest PKRU from current - * XSAVE. - */ - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = rdpkru(); - if (vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vmx->host_pkru); - } - kvm_load_host_xsave_state(vcpu); vmx->nested.nested_run_pending = 0; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index e64da06c7009..ff7361aa824c 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -320,7 +320,8 @@ struct kvm_vmx { }; bool nested_vmx_allowed(struct kvm_vcpu *vcpu); -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu); +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, + struct loaded_vmcs *buddy); void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); int allocate_vpid(void); void free_vpid(int vpid); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 17650bda4331..4b4a8a4e0251 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -809,11 +809,25 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) vcpu->arch.ia32_xss != host_xss) wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); } + + if (static_cpu_has(X86_FEATURE_PKU) && + (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || + (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) && + vcpu->arch.pkru != vcpu->arch.host_pkru) + __write_pkru(vcpu->arch.pkru); } EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { + if (static_cpu_has(X86_FEATURE_PKU) && + (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || + (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) { + vcpu->arch.pkru = rdpkru(); + if (vcpu->arch.pkru != vcpu->arch.host_pkru) + __write_pkru(vcpu->arch.host_pkru); + } + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { if (vcpu->arch.xcr0 != host_xcr0) @@ -3529,6 +3543,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); + /* Save host pkru register if supported */ + vcpu->arch.host_pkru = read_pkru(); + /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -3722,7 +3739,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, unsigned bank_num = mcg_cap & 0xff, bank; r = -EINVAL; - if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) + if (!bank_num || bank_num > KVM_MAX_MCE_BANKS) goto out; if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000)) goto out; @@ -4551,7 +4568,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (kvm_state.flags & ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE - | KVM_STATE_NESTED_EVMCS)) + | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING)) break; /* nested_run_pending implies guest_mode. */ @@ -6891,7 +6908,7 @@ restart: if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_rip_write(vcpu, ctxt->eip); - if (r && ctxt->tf) + if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) r = kvm_vcpu_do_singlestep(vcpu); if (kvm_x86_ops->update_emulated_instruction) kvm_x86_ops->update_emulated_instruction(vcpu); @@ -8098,9 +8115,8 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, - bool blockable) +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) { unsigned long apic_address; @@ -8111,8 +8127,6 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (start <= apic_address && apic_address < end) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); - - return 0; } void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 69309cd56fdf..33093fdedb02 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long addr) (void *)st->start_address); } -static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +static void effective_prot(struct ptdump_state *pt_st, int level, u64 val) { - return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | - ((prot1 | prot2) & _PAGE_NX); + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + pgprotval_t prot = val & PTE_FLAGS_MASK; + pgprotval_t effective; + + if (level > 0) { + pgprotval_t higher_prot = st->prot_levels[level - 1]; + + effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) | + ((higher_prot | prot) & _PAGE_NX); + } else { + effective = prot; + } + + st->prot_levels[level] = effective; } /* @@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, struct seq_file *m = st->seq; new_prot = val & PTE_FLAGS_MASK; - - if (level > 0) { - new_eff = effective_prot(st->prot_levels[level - 1], - new_prot); - } else { - new_eff = new_prot; - } - - if (level >= 0) - st->prot_levels[level] = new_eff; + if (!val) + new_eff = 0; + else + new_eff = st->prot_levels[level]; /* * If we have a "break" in the series, we need to flush the state that @@ -374,6 +380,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, struct pg_state st = { .ptdump = { .note_page = note_page, + .effective_prot = effective_prot, .range = ptdump_ranges }, .level = -1, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index abbdecb75fad..023e1ec5e153 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -54,6 +54,7 @@ #include <asm/init.h> #include <asm/uv/uv.h> #include <asm/setup.h> +#include <asm/ftrace.h> #include "mm_internal.h" @@ -1288,6 +1289,8 @@ void mark_rodata_ro(void) all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); + set_ftrace_ops_ro(); + #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); set_memory_rw(start, (end-start) >> PAGE_SHIFT); diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 673de6063345..92530af38b09 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -372,7 +372,7 @@ static void enter_uniprocessor(void) int cpu; int err; - if (downed_cpus == NULL && + if (!cpumask_available(downed_cpus) && !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { pr_notice("Failed to allocate mask\n"); goto out; @@ -402,7 +402,7 @@ static void leave_uniprocessor(void) int cpu; int err; - if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) + if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0) return; pr_notice("Re-enabling CPUs...\n"); for_each_cpu(cpu, downed_cpus) { diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index c4aedd00c1ba..7ab317e3184e 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -42,7 +42,8 @@ struct cpa_data { unsigned long pfn; unsigned int flags; unsigned int force_split : 1, - force_static_prot : 1; + force_static_prot : 1, + force_flush_all : 1; struct page **pages; }; @@ -352,10 +353,10 @@ static void cpa_flush(struct cpa_data *data, int cache) return; } - if (cpa->numpages <= tlb_single_page_flush_ceiling) - on_each_cpu(__cpa_flush_tlb, cpa, 1); - else + if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) flush_tlb_all(); + else + on_each_cpu(__cpa_flush_tlb, cpa, 1); if (!cache) return; @@ -1595,6 +1596,8 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + cpa->force_flush_all = 1; + ret = __change_page_attr_set_clr(&alias_cpa, 0); if (ret) return ret; @@ -1615,6 +1618,7 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + cpa->force_flush_all = 1; /* * The high mapping range is imprecise, so ignore the * return value. diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index e723559c386a..0c67a5a94de3 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -572,6 +572,10 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa1ec, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa1ed, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa26c, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa26d, pci_invalid_bar); /* * Device [1022:7808] diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 802ee5bba66c..0cebe5db691d 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -92,6 +92,7 @@ asmlinkage __visible void cpu_bringup_and_idle(void) cpu_bringup(); boot_init_stack_canary(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + prevent_tail_call_optimization(); } void xen_smp_intr_free_pv(unsigned int cpu) |