aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap4
-rw-r--r--Documentation/accounting/psi.txt12
-rw-r--r--Documentation/bpf/btf.rst8
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.yaml2
-rw-r--r--Documentation/devicetree/bindings/hwmon/adc128d818.txt4
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-xscale.txt)0
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-mtk.txt)0
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-stu300.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt)0
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt)0
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-wmt.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-vt8500.txt)0
-rw-r--r--Documentation/devicetree/bindings/net/dsa/qca8k.txt73
-rw-r--r--Documentation/devicetree/bindings/serial/mtk-uart.txt1
-rw-r--r--Documentation/filesystems/mount_api.txt367
-rw-r--r--Documentation/i2c/busses/i2c-i8011
-rw-r--r--Documentation/lzo.txt8
-rw-r--r--Documentation/networking/bpf_flow_dissector.rst126
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--Documentation/networking/msg_zerocopy.rst2
-rw-r--r--Documentation/networking/netdev-FAQ.rst13
-rw-r--r--Documentation/networking/nf_flowtable.txt8
-rw-r--r--Documentation/networking/snmp_counter.rst12
-rw-r--r--Documentation/powerpc/DAWR-POWER9.txt32
-rw-r--r--Documentation/virtual/kvm/api.txt91
-rw-r--r--Documentation/virtual/kvm/devices/vm.txt3
-rw-r--r--Documentation/virtual/kvm/devices/xive.txt197
-rw-r--r--Documentation/virtual/kvm/mmu.txt11
-rw-r--r--MAINTAINERS16
-rw-r--r--Makefile49
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/alpha/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/syscall.h7
-rw-r--r--arch/arc/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/dts/am335x-evm.dts26
-rw-r--r--arch/arm/boot/dts/am335x-evmsk.dts26
-rw-r--r--arch/arm/boot/dts/am33xx-l4.dtsi4
-rw-r--r--arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts2
-rw-r--r--arch/arm/boot/dts/imx6dl-yapp4-common.dtsi6
-rw-r--r--arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi4
-rw-r--r--arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi1
-rw-r--r--arch/arm/boot/dts/imx6ull-pinfunc-snvs.h2
-rw-r--r--arch/arm/boot/dts/rk3288-tinker.dtsi3
-rw-r--r--arch/arm/boot/dts/rk3288-veyron.dtsi2
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi20
-rw-r--r--arch/arm/boot/dts/sama5d2-pinfunc.h2
-rw-r--r--arch/arm/configs/imx_v4_v5_defconfig3
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/arm/include/asm/kvm_mmu.h11
-rw-r--r--arch/arm/include/asm/stage2_pgtable.h2
-rw-r--r--arch/arm/include/asm/syscall.h47
-rw-r--r--arch/arm/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/arm/mach-at91/pm.c6
-rw-r--r--arch/arm/mach-imx/cpuidle-imx6q.c27
-rw-r--r--arch/arm/mach-imx/mach-imx51.c1
-rw-r--r--arch/arm/mach-iop13xx/setup.c8
-rw-r--r--arch/arm/mach-iop13xx/tpmi.c10
-rw-r--r--arch/arm/mach-milbeaut/platsmp.c4
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c2
-rw-r--r--arch/arm/mach-omap2/display.c4
-rw-r--r--arch/arm/plat-iop/adma.c6
-rw-r--r--arch/arm/plat-orion/common.c4
-rw-r--r--arch/arm64/Kconfig.platforms1
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi3
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186.dtsi1
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774c0.dtsi7
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77990.dtsi7
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock64.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi58
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h11
-rw-r--r--arch/arm64/include/asm/syscall.h46
-rw-r--r--arch/arm64/kernel/sdei.c6
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/arm64/kvm/reset.c6
-rw-r--r--arch/c6x/include/asm/Kbuild1
-rw-r--r--arch/c6x/include/asm/syscall.h79
-rw-r--r--arch/c6x/include/uapi/asm/Kbuild1
-rw-r--r--arch/csky/include/asm/syscall.h26
-rw-r--r--arch/h8300/include/asm/Kbuild1
-rw-r--r--arch/h8300/include/asm/syscall.h34
-rw-r--r--arch/h8300/include/uapi/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/syscall.h4
-rw-r--r--arch/hexagon/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/ia64/include/asm/Kbuild1
-rw-r--r--arch/ia64/include/asm/syscall.h13
-rw-r--r--arch/ia64/include/uapi/asm/Kbuild1
-rw-r--r--arch/ia64/kernel/ptrace.c7
-rw-r--r--arch/m68k/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/uapi/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/syscall.h8
-rw-r--r--arch/microblaze/include/uapi/asm/Kbuild1
-rw-r--r--arch/mips/configs/generic/board-ocelot.config8
-rw-r--r--arch/mips/include/asm/syscall.h3
-rw-r--r--arch/mips/kernel/kgdb.c3
-rw-r--r--arch/mips/kernel/ptrace.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c3
-rw-r--r--arch/nds32/include/asm/syscall.h62
-rw-r--r--arch/nios2/include/asm/Kbuild1
-rw-r--r--arch/nios2/include/asm/syscall.h84
-rw-r--r--arch/nios2/include/uapi/asm/Kbuild1
-rw-r--r--arch/openrisc/include/asm/Kbuild1
-rw-r--r--arch/openrisc/include/asm/syscall.h12
-rw-r--r--arch/openrisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/ptrace.h5
-rw-r--r--arch/parisc/include/asm/syscall.h30
-rw-r--r--arch/parisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/parisc/kernel/process.c6
-rw-r--r--arch/parisc/kernel/setup.c3
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h8
-rw-r--r--arch/powerpc/include/asm/kvm_host.h11
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h41
-rw-r--r--arch/powerpc/include/asm/opal-api.h7
-rw-r--r--arch/powerpc/include/asm/opal.h7
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/asm/syscall.h15
-rw-r--r--arch/powerpc/include/asm/xive.h17
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h46
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c62
-rw-r--r--arch/powerpc/kernel/kvm.c7
-rw-r--r--arch/powerpc/kernel/process.c9
-rw-r--r--arch/powerpc/kernel/ptrace.c3
-rw-r--r--arch/powerpc/kvm/Makefile2
-rw-r--r--arch/powerpc/kvm/book3s.c42
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c102
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c105
-rw-r--r--arch/powerpc/kvm/book3s_hv.c159
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c57
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c144
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S109
-rw-r--r--arch/powerpc/kvm/book3s_xive.c250
-rw-r--r--arch/powerpc/kvm/book3s_xive.h37
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c1249
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c78
-rw-r--r--arch/powerpc/kvm/powerpc.c40
-rw-r--r--arch/powerpc/lib/memcmp_64.S17
-rw-r--r--arch/powerpc/net/bpf_jit.h17
-rw-r--r--arch/powerpc/net/bpf_jit32.h4
-rw-r--r--arch/powerpc/net/bpf_jit64.h20
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c12
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c3
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c27
-rw-r--r--arch/powerpc/platforms/pseries/ras.c1
-rw-r--r--arch/powerpc/sysdev/xive/native.c110
-rw-r--r--arch/riscv/include/asm/fixmap.h2
-rw-r--r--arch/riscv/include/asm/syscall.h24
-rw-r--r--arch/riscv/include/asm/uaccess.h2
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/module.c2
-rw-r--r--arch/riscv/kernel/setup.c8
-rw-r--r--arch/riscv/mm/Makefile6
-rw-r--r--arch/riscv/mm/init.c28
-rw-r--r--arch/s390/include/asm/ap.h11
-rw-r--r--arch/s390/include/asm/cpacf.h1
-rw-r--r--arch/s390/include/asm/elf.h11
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/s390/include/asm/lowcore.h61
-rw-r--r--arch/s390/include/asm/syscall.h28
-rw-r--r--arch/s390/include/uapi/asm/kvm.h5
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c19
-rw-r--r--arch/s390/kernel/smp.c3
-rw-r--r--arch/s390/kernel/vtime.c19
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/interrupt.c11
-rw-r--r--arch/s390/kvm/kvm-s390.c120
-rw-r--r--arch/s390/kvm/vsie.c13
-rw-r--r--arch/s390/tools/gen_facilities.c3
-rw-r--r--arch/sh/boards/of-generic.c4
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sh/include/asm/syscall_32.h47
-rw-r--r--arch/sh/include/asm/syscall_64.h8
-rw-r--r--arch/sh/include/uapi/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/syscall.h11
-rw-r--r--arch/sparc/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/um/include/asm/syscall-generic.h78
-rw-r--r--arch/unicore32/include/asm/Kbuild1
-rw-r--r--arch/unicore32/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/boot/compressed/misc.h4
-rw-r--r--arch/x86/events/intel/core.c6
-rw-r--r--arch/x86/include/asm/cpufeature.h5
-rw-r--r--arch/x86/include/asm/e820/api.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h32
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/include/asm/realmode.h6
-rw-r--r--arch/x86/include/asm/syscall.h142
-rw-r--r--arch/x86/include/asm/xen/hypercall.h3
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c3
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kvm/cpuid.c12
-rw-r--r--arch/x86/kvm/emulate.c191
-rw-r--r--arch/x86/kvm/hyperv.c33
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h42
-rw-r--r--arch/x86/kvm/lapic.c42
-rw-r--r--arch/x86/kvm/mmu.c90
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/mmutrace.h4
-rw-r--r--arch/x86/kvm/mtrr.c10
-rw-r--r--arch/x86/kvm/paging_tmpl.h38
-rw-r--r--arch/x86/kvm/pmu.c4
-rw-r--r--arch/x86/kvm/svm.c233
-rw-r--r--arch/x86/kvm/trace.h4
-rw-r--r--arch/x86/kvm/vmx/capabilities.h2
-rw-r--r--arch/x86/kvm/vmx/nested.c472
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c8
-rw-r--r--arch/x86/kvm/vmx/vmx.c94
-rw-r--r--arch/x86/kvm/vmx/vmx.h14
-rw-r--r--arch/x86/kvm/x86.c322
-rw-r--r--arch/x86/kvm/x86.h12
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--arch/x86/platform/efi/quirks.c2
-rw-r--r--arch/x86/realmode/init.c11
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--arch/xtensa/include/asm/processor.h21
-rw-r--r--arch/xtensa/include/asm/syscall.h33
-rw-r--r--arch/xtensa/include/uapi/asm/Kbuild1
-rw-r--r--arch/xtensa/kernel/entry.S6
-rw-r--r--arch/xtensa/kernel/stacktrace.c6
-rw-r--r--arch/xtensa/mm/mmu.c2
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/bfq-wf2q.c2
-rw-r--r--block/blk-core.c4
-rw-r--r--block/blk-flush.c4
-rw-r--r--block/blk-mq-sched.c8
-rw-r--r--block/blk-mq.c145
-rw-r--r--block/blk-mq.h15
-rw-r--r--drivers/acpi/acpica/evgpe.c6
-rw-r--r--drivers/acpi/bus.c3
-rw-r--r--drivers/acpi/cppc_acpi.c9
-rw-r--r--drivers/android/binder.c3
-rw-r--r--drivers/android/binder_alloc.c18
-rw-r--r--drivers/ata/libata-zpodd.c34
-rw-r--r--drivers/block/null_blk_main.c5
-rw-r--r--drivers/block/paride/pcd.c14
-rw-r--r--drivers/block/paride/pf.c12
-rw-r--r--drivers/block/xsysace.c2
-rw-r--r--drivers/block/zram/zram_drv.c32
-rw-r--r--drivers/bluetooth/btusb.c2
-rw-r--r--drivers/char/Kconfig2
-rw-r--r--drivers/char/tpm/eventlog/tpm2.c4
-rw-r--r--drivers/char/tpm/tpm-dev-common.c9
-rw-r--r--drivers/char/tpm/tpm-interface.c14
-rw-r--r--drivers/cpufreq/intel_pstate.c10
-rw-r--r--drivers/cpufreq/scpi-cpufreq.c2
-rw-r--r--drivers/crypto/caam/caamhash.c13
-rw-r--r--drivers/dma/stm32-mdma.c4
-rw-r--r--drivers/gpio/gpio-adnp.c6
-rw-r--r--drivers/gpio/gpio-aspeed.c2
-rw-r--r--drivers/gpio/gpio-exar.c2
-rw-r--r--drivers/gpio/gpio-mockup.c10
-rw-r--r--drivers/gpio/gpiolib-of.c17
-rw-r--r--drivers/gpio/gpiolib.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c6
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c20
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h5
-rw-r--r--drivers/gpu/drm/drm_drv.c6
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c2
-rw-r--r--drivers/gpu/drm/drm_file.c6
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/display.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c8
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c16
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.h1
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c33
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c5
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h3
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h4
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_evict.c2
-rw-r--r--drivers/gpu/drm/meson/meson_drv.c9
-rw-r--r--drivers/gpu/drm/meson/meson_dw_hdmi.c3
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.c18
-rw-r--r--drivers/gpu/drm/tegra/hub.c4
-rw-r--r--drivers/gpu/drm/tegra/vic.c2
-rw-r--r--drivers/gpu/drm/udl/udl_connector.c72
-rw-r--r--drivers/gpu/drm/vgem/vgem_drv.c6
-rw-r--r--drivers/gpu/drm/vkms/vkms_gem.c5
-rw-r--r--drivers/hid/Kconfig1
-rw-r--r--drivers/hid/hid-core.c6
-rw-r--r--drivers/hid/hid-debug.c5
-rw-r--r--drivers/hid/hid-ids.h1
-rw-r--r--drivers/hid/hid-input.c1
-rw-r--r--drivers/hid/hid-logitech-hidpp.c13
-rw-r--r--drivers/hid/hid-quirks.c11
-rw-r--r--drivers/hid/hid-steam.c26
-rw-r--r--drivers/hid/hid-uclogic-params.c4
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-core.c2
-rw-r--r--drivers/hwmon/Kconfig1
-rw-r--r--drivers/hwmon/ntc_thermistor.c2
-rw-r--r--drivers/hwmon/occ/common.c6
-rw-r--r--drivers/i2c/busses/Kconfig1
-rw-r--r--drivers/i2c/busses/i2c-i801.c4
-rw-r--r--drivers/i2c/busses/i2c-imx.c4
-rw-r--r--drivers/iommu/amd_iommu.c9
-rw-r--r--drivers/iommu/amd_iommu_init.c7
-rw-r--r--drivers/iommu/amd_iommu_types.h2
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c19
-rw-r--r--drivers/iommu/iommu.c8
-rw-r--r--drivers/isdn/hardware/mISDN/hfcmulti.c3
-rw-r--r--drivers/leds/leds-pca9532.c8
-rw-r--r--drivers/leds/trigger/ledtrig-netdev.c16
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-init.c2
-rw-r--r--drivers/md/dm-integrity.c16
-rw-r--r--drivers/md/dm-rq.c11
-rw-r--r--drivers/md/dm-table.c39
-rw-r--r--drivers/md/dm.c30
-rw-r--r--drivers/mfd/Kconfig2
-rw-r--r--drivers/mfd/sprd-sc27xx-spi.c42
-rw-r--r--drivers/mfd/twl-core.c23
-rw-r--r--drivers/misc/habanalabs/command_submission.c6
-rw-r--r--drivers/misc/habanalabs/debugfs.c7
-rw-r--r--drivers/misc/habanalabs/device.c71
-rw-r--r--drivers/misc/habanalabs/goya/goya.c65
-rw-r--r--drivers/misc/habanalabs/habanalabs.h21
-rw-r--r--drivers/misc/habanalabs/hw_queue.c5
-rw-r--r--drivers/misc/habanalabs/memory.c38
-rw-r--r--drivers/misc/habanalabs/mmu.c6
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c6
-rw-r--r--drivers/net/Kconfig4
-rw-r--r--drivers/net/bonding/bond_sysfs_slave.c4
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.c24
-rw-r--r--drivers/net/dsa/qca8k.c174
-rw-r--r--drivers/net/dsa/qca8k.h13
-rw-r--r--drivers/net/ethernet/3com/3c515.c2
-rw-r--r--drivers/net/ethernet/8390/mac8390.c19
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c16
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c8
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h4
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c14
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c20
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c30
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c2
-rw-r--r--drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c9
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c15
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hnae.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hnae.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h4
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c53
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h4
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h12
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c12
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c13
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile2
-rw-r--r--drivers/net/ethernet/hisilicon/hns_mdio.c18
-rw-r--r--drivers/net/ethernet/ibm/ehea/ehea_main.c1
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c5
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h16
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c28
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ptp.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c3
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_defines.h2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c57
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c82
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c2
-rw-r--r--drivers/net/ethernet/micrel/ks8851.c42
-rw-r--r--drivers/net/ethernet/micrel/ks8851.h93
-rw-r--r--drivers/net/ethernet/micrel/ks8851_mll.c317
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c3
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h3
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c27
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_repr.c4
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic.h2
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c2
-rw-r--r--drivers/net/ethernet/realtek/atp.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c16
-rw-r--r--drivers/net/ethernet/sis/sis900.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/descs_com.h22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/ring_mode.c13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c48
-rw-r--r--drivers/net/ethernet/ti/netcp_ethss.c8
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c2
-rw-r--r--drivers/net/hyperv/hyperv_net.h1
-rw-r--r--drivers/net/hyperv/netvsc.c6
-rw-r--r--drivers/net/hyperv/netvsc_drv.c32
-rw-r--r--drivers/net/ieee802154/adf7242.c4
-rw-r--r--drivers/net/ieee802154/mac802154_hwsim.c2
-rw-r--r--drivers/net/phy/Kconfig3
-rw-r--r--drivers/net/phy/broadcom.c13
-rw-r--r--drivers/net/phy/dp83822.c34
-rw-r--r--drivers/net/phy/meson-gxl.c6
-rw-r--r--drivers/net/phy/phy_device.c2
-rw-r--r--drivers/net/tun.c16
-rw-r--r--drivers/net/usb/aqc111.c15
-rw-r--r--drivers/net/usb/cdc_ether.c8
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/vrf.c9
-rw-r--r--drivers/net/vxlan.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/dma.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mac80211.c18
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76.h4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/beacon.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/dma.c17
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/init.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/mac.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/main.c16
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/mcu.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/soc.c4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x0/usb.c10
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02.h11
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c27
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_mac.c67
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_mac.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c82
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_phy.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_util.c14
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/init.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h1
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c21
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/phy.c30
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/usb.c7
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c1
-rw-r--r--drivers/net/wireless/mediatek/mt76/tx.c11
-rw-r--r--drivers/net/wireless/mediatek/mt76/usb.c6
-rw-r--r--drivers/net/wireless/mediatek/mt7601u/usb.c4
-rw-r--r--drivers/nvme/host/multipath.c5
-rw-r--r--drivers/nvme/host/tcp.c2
-rw-r--r--drivers/nvme/target/core.c4
-rw-r--r--drivers/nvme/target/io-cmd-file.c20
-rw-r--r--drivers/parisc/iosapic.c6
-rw-r--r--drivers/parport/daisy.c32
-rw-r--r--drivers/parport/probe.c2
-rw-r--r--drivers/parport/share.c10
-rw-r--r--drivers/pci/pci.h1
-rw-r--r--drivers/pci/pcie/bw_notification.c23
-rw-r--r--drivers/pci/probe.c2
-rw-r--r--drivers/phy/allwinner/phy-sun4i-usb.c5
-rw-r--r--drivers/reset/reset-meson-audio-arb.c1
-rw-r--r--drivers/rtc/Kconfig4
-rw-r--r--drivers/rtc/rtc-cros-ec.c4
-rw-r--r--drivers/rtc/rtc-da9063.c7
-rw-r--r--drivers/rtc/rtc-sh.c2
-rw-r--r--drivers/s390/cio/chsc.c13
-rw-r--r--drivers/s390/cio/vfio_ccw_drv.c8
-rw-r--r--drivers/s390/crypto/ap_bus.c19
-rw-r--r--drivers/s390/crypto/ap_bus.h2
-rw-r--r--drivers/s390/crypto/ap_queue.c26
-rw-r--r--drivers/s390/crypto/zcrypt_api.c30
-rw-r--r--drivers/s390/net/qeth_core_main.c5
-rw-r--r--drivers/s390/net/qeth_l2_main.c7
-rw-r--r--drivers/s390/net/qeth_l3_main.c8
-rw-r--r--drivers/s390/scsi/zfcp_erp.c17
-rw-r--r--drivers/s390/scsi/zfcp_ext.h2
-rw-r--r--drivers/s390/scsi/zfcp_fc.c21
-rw-r--r--drivers/s390/scsi/zfcp_scsi.c4
-rw-r--r--drivers/scsi/aacraid/aacraid.h7
-rw-r--r--drivers/scsi/aacraid/commsup.c4
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.c39
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.h7
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c7
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c6
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c12
-rw-r--r--drivers/scsi/qedi/qedi_main.c7
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c2
-rw-r--r--drivers/scsi/scsi_devinfo.c1
-rw-r--r--drivers/scsi/scsi_dh.c1
-rw-r--r--drivers/scsi/scsi_sysfs.c6
-rw-r--r--drivers/scsi/sd.c22
-rw-r--r--drivers/scsi/storvsc_drv.c15
-rw-r--r--drivers/soc/bcm/bcm2835-power.c49
-rw-r--r--drivers/staging/Kconfig2
-rw-r--r--drivers/staging/Makefile1
-rw-r--r--drivers/staging/axis-fifo/Kconfig1
-rw-r--r--drivers/staging/comedi/comedidev.h2
-rw-r--r--drivers/staging/comedi/drivers.c33
-rw-r--r--drivers/staging/comedi/drivers/ni_mio_common.c10
-rw-r--r--drivers/staging/erofs/dir.c45
-rw-r--r--drivers/staging/erofs/unzip_vle.c45
-rw-r--r--drivers/staging/erofs/unzip_vle_lz4.c7
-rw-r--r--drivers/staging/mt7621-dts/gbpc1.dts29
-rw-r--r--drivers/staging/mt7621-dts/mt7621.dtsi73
-rw-r--r--drivers/staging/mt7621-eth/Documentation/devicetree/bindings/net/mediatek-net-gsw.txt48
-rw-r--r--drivers/staging/mt7621-eth/Kconfig39
-rw-r--r--drivers/staging/mt7621-eth/Makefile14
-rw-r--r--drivers/staging/mt7621-eth/TODO13
-rw-r--r--drivers/staging/mt7621-eth/ethtool.c250
-rw-r--r--drivers/staging/mt7621-eth/ethtool.h15
-rw-r--r--drivers/staging/mt7621-eth/gsw_mt7620.h277
-rw-r--r--drivers/staging/mt7621-eth/gsw_mt7621.c297
-rw-r--r--drivers/staging/mt7621-eth/mdio.c275
-rw-r--r--drivers/staging/mt7621-eth/mdio.h27
-rw-r--r--drivers/staging/mt7621-eth/mdio_mt7620.c173
-rw-r--r--drivers/staging/mt7621-eth/mtk_eth_soc.c2176
-rw-r--r--drivers/staging/mt7621-eth/mtk_eth_soc.h716
-rw-r--r--drivers/staging/mt7621-eth/soc_mt7621.c161
-rw-r--r--drivers/staging/mt7621-pci/Kconfig1
-rw-r--r--drivers/staging/octeon/ethernet-mdio.c2
-rw-r--r--drivers/staging/octeon/ethernet.c40
-rw-r--r--drivers/staging/octeon/octeon-ethernet.h4
-rw-r--r--drivers/staging/olpc_dcon/olpc_dcon_xo_1.c2
-rw-r--r--drivers/staging/rtl8188eu/core/rtw_xmit.c9
-rw-r--r--drivers/staging/rtl8188eu/include/rtw_xmit.h2
-rw-r--r--drivers/staging/rtl8712/rtl8712_cmd.c10
-rw-r--r--drivers/staging/rtl8712/rtl8712_cmd.h2
-rw-r--r--drivers/staging/rtl8723bs/core/rtw_xmit.c14
-rw-r--r--drivers/staging/rtl8723bs/include/rtw_xmit.h2
-rw-r--r--drivers/staging/rtlwifi/phydm/rtl_phydm.c2
-rw-r--r--drivers/staging/rtlwifi/rtl8822be/fw.c2
-rw-r--r--drivers/staging/speakup/speakup_soft.c16
-rw-r--r--drivers/staging/speakup/spk_priv.h1
-rw-r--r--drivers/staging/speakup/synth.c6
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c8
-rw-r--r--drivers/staging/vt6655/device_main.c11
-rw-r--r--drivers/tty/serial/ar933x_uart.c24
-rw-r--r--drivers/tty/serial/atmel_serial.c52
-rw-r--r--drivers/tty/serial/kgdboc.c4
-rw-r--r--drivers/tty/serial/max310x.c2
-rw-r--r--drivers/tty/serial/mvebu-uart.c3
-rw-r--r--drivers/tty/serial/mxs-auart.c4
-rw-r--r--drivers/tty/serial/qcom_geni_serial.c2
-rw-r--r--drivers/tty/serial/sc16is7xx.c12
-rw-r--r--drivers/tty/serial/sh-sci.c12
-rw-r--r--drivers/tty/tty_port.c10
-rw-r--r--drivers/usb/class/cdc-acm.c4
-rw-r--r--drivers/usb/common/common.c2
-rw-r--r--drivers/usb/core/hcd.c3
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c4
-rw-r--r--drivers/usb/gadget/function/f_hid.c6
-rw-r--r--drivers/usb/gadget/udc/net2272.c1
-rw-r--r--drivers/usb/gadget/udc/net2280.c8
-rw-r--r--drivers/usb/host/u132-hcd.c3
-rw-r--r--drivers/usb/host/xhci-dbgcap.c5
-rw-r--r--drivers/usb/host/xhci-hub.c19
-rw-r--r--drivers/usb/host/xhci-rcar.c1
-rw-r--r--drivers/usb/host/xhci-ring.c9
-rw-r--r--drivers/usb/host/xhci.h8
-rw-r--r--drivers/usb/misc/usb251xb.c4
-rw-r--r--drivers/usb/mtu3/Kconfig1
-rw-r--r--drivers/usb/serial/cp210x.c1
-rw-r--r--drivers/usb/serial/ftdi_sio.c2
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h4
-rw-r--r--drivers/usb/serial/mos7720.c4
-rw-r--r--drivers/usb/serial/option.c17
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c27
-rw-r--r--drivers/usb/typec/tcpm/wcove.c9
-rw-r--r--drivers/vfio/pci/vfio_pci.c4
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c2
-rw-r--r--drivers/vfio/vfio_iommu_type1.c14
-rw-r--r--drivers/virt/vboxguest/vboxguest_core.c106
-rw-r--r--drivers/virt/vboxguest/vboxguest_core.h15
-rw-r--r--drivers/virt/vboxguest/vboxguest_linux.c26
-rw-r--r--drivers/virt/vboxguest/vboxguest_utils.c32
-rw-r--r--drivers/virt/vboxguest/vboxguest_version.h9
-rw-r--r--drivers/virt/vboxguest/vmmdev.h8
-rw-r--r--drivers/xen/privcmd-buf.c3
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c4
-rw-r--r--fs/afs/fsclient.c6
-rw-r--r--fs/afs/yfsclient.c2
-rw-r--r--fs/aio.c338
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/raid56.c3
-rw-r--r--fs/btrfs/transaction.c49
-rw-r--r--fs/btrfs/tree-log.c33
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsglob.h8
-rw-r--r--fs/cifs/connect.c30
-rw-r--r--fs/cifs/smb2file.c6
-rw-r--r--fs/cifs/smb2ops.c68
-rw-r--r--fs/cifs/smb2pdu.c49
-rw-r--r--fs/cifs/smb2proto.h5
-rw-r--r--fs/debugfs/inode.c13
-rw-r--r--fs/fs_parser.c2
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/io_uring.c27
-rw-r--r--fs/jffs2/readinode.c5
-rw-r--r--fs/jffs2/super.c5
-rw-r--r--fs/lockd/host.c3
-rw-r--r--fs/locks.c5
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c5
-rw-r--r--fs/nfs/nfs4proc.c5
-rw-r--r--fs/ocfs2/refcounttree.c42
-rw-r--r--fs/open.c24
-rw-r--r--fs/proc/base.c17
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/proc_sysctl.c3
-rw-r--r--fs/read_write.c5
-rw-r--r--fs/ubifs/super.c4
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c15
-rw-r--r--fs/xfs/scrub/btree.c11
-rw-r--r--fs/xfs/scrub/dabtree.c5
-rw-r--r--fs/xfs/xfs_discard.c8
-rw-r--r--fs/xfs/xfs_file.c27
-rw-r--r--include/acpi/acoutput.h3
-rw-r--r--include/acpi/platform/aclinux.h5
-rw-r--r--include/asm-generic/syscall.h21
-rw-r--r--include/dt-bindings/reset/amlogic,meson-g12a-reset.h5
-rw-r--r--include/keys/trusted.h2
-rw-r--r--include/linux/atalk.h2
-rw-r--r--include/linux/bitrev.h46
-rw-r--r--include/linux/bpf.h1
-rw-r--r--include/linux/bpf_verifier.h40
-rw-r--r--include/linux/brcmphy.h16
-rw-r--r--include/linux/device.h2
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/kcore.h11
-rw-r--r--include/linux/kvm_host.h58
-rw-r--r--include/linux/list.h2
-rw-r--r--include/linux/memcontrol.h5
-rw-r--r--include/linux/mii.h2
-rw-r--r--include/linux/mlx5/driver.h2
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/net.h6
-rw-r--r--include/linux/page-isolation.h10
-rw-r--r--include/linux/parport.h13
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/linux/platform_data/gpio/gpio-amd-fch.h2
-rw-r--r--include/linux/ptrace.h11
-rw-r--r--include/linux/sched/signal.h18
-rw-r--r--include/linux/slab.h2
-rw-r--r--include/linux/socket.h12
-rw-r--r--include/linux/string.h3
-rw-r--r--include/linux/vbox_utils.h12
-rw-r--r--include/net/act_api.h9
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--include/net/netns/hash.h10
-rw-r--r--include/net/nfc/nci_core.h2
-rw-r--r--include/net/sch_generic.h45
-rw-r--r--include/net/sctp/checksum.h2
-rw-r--r--include/net/sock.h6
-rw-r--r--include/net/tc_act/tc_gact.h2
-rw-r--r--include/net/xdp_sock.h1
-rw-r--r--include/trace/events/syscalls.h2
-rw-r--r--include/uapi/linux/Kbuild2
-rw-r--r--include/uapi/linux/bpf.h188
-rw-r--r--include/uapi/linux/ethtool.h2
-rw-r--r--include/uapi/linux/kvm.h14
-rw-r--r--include/uapi/linux/vbox_vmmdev_types.h60
-rw-r--r--kernel/bpf/cpumap.c13
-rw-r--r--kernel/bpf/inode.c32
-rw-r--r--kernel/bpf/syscall.c22
-rw-r--r--kernel/bpf/verifier.c159
-rw-r--r--kernel/cpu.c20
-rw-r--r--kernel/ptrace.c15
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/signal.c13
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/trace_dynevent.c2
-rw-r--r--kernel/trace/trace_events_hist.c1
-rw-r--r--kernel/trace/trace_syscalls.c9
-rw-r--r--kernel/watchdog.c6
-rw-r--r--lib/iov_iter.c4
-rw-r--r--lib/lzo/lzo1x_compress.c9
-rw-r--r--lib/lzo/lzo1x_decompress_safe.c4
-rw-r--r--lib/rhashtable.c8
-rw-r--r--lib/sbitmap.c11
-rw-r--r--lib/string.c20
-rw-r--r--lib/syscall.c57
-rw-r--r--mm/compaction.c29
-rw-r--r--mm/debug.c4
-rw-r--r--mm/huge_memory.c36
-rw-r--r--mm/kasan/kasan.h5
-rw-r--r--mm/kmemleak.c16
-rw-r--r--mm/memcontrol.c20
-rw-r--r--mm/memory.c11
-rw-r--r--mm/memory_hotplug.c19
-rw-r--r--mm/mempolicy.c40
-rw-r--r--mm/migrate.c11
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/page_isolation.c51
-rw-r--r--mm/slab.c5
-rw-r--r--mm/slab.h3
-rw-r--r--mm/slab_common.c2
-rw-r--r--mm/slub.c5
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/util.c2
-rw-r--r--net/8021q/vlan_dev.c26
-rw-r--r--net/appletalk/aarp.c15
-rw-r--r--net/appletalk/ddp.c20
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c16
-rw-r--r--net/batman-adv/sysfs.c7
-rw-r--r--net/batman-adv/translation-table.c32
-rw-r--r--net/bridge/br_multicast.c3
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/bridge/br_netfilter_ipv6.c2
-rw-r--r--net/ceph/messenger.c8
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/devlink.c5
-rw-r--r--net/core/ethtool.c46
-rw-r--r--net/core/filter.c43
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/net-sysfs.c20
-rw-r--r--net/core/net_namespace.c1
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/dccp/feat.c7
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dsa/tag_qca.c10
-rw-r--r--net/ipv4/ip_gre.c15
-rw-r--r--net/ipv4/ip_input.c7
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/tcp_dctcp.c36
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/ip6_gre.c20
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c6
-rw-r--r--net/ipv6/route.c18
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/kcm/kcmsock.c16
-rw-r--r--net/mpls/mpls_iptunnel.c12
-rw-r--r--net/ncsi/ncsi-netlink.c4
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/nf_conntrack_sip.c37
-rw-r--r--net/netfilter/nf_tables_api.c5
-rw-r--r--net/netfilter/nft_objref.c19
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c7
-rw-r--r--net/netlink/genetlink.c3
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/nfc/nci/hci.c8
-rw-r--r--net/openvswitch/datapath.c12
-rw-r--r--net/openvswitch/flow_netlink.c4
-rw-r--r--net/packet/af_packet.c7
-rw-r--r--net/rds/tcp.c2
-rw-r--r--net/rose/rose_subr.c21
-rw-r--r--net/rxrpc/output.c11
-rw-r--r--net/sched/Kconfig3
-rw-r--r--net/sched/act_api.c101
-rw-r--r--net/sched/act_bpf.c25
-rw-r--r--net/sched/act_connmark.c22
-rw-r--r--net/sched/act_csum.c22
-rw-r--r--net/sched/act_gact.c15
-rw-r--r--net/sched/act_ife.c35
-rw-r--r--net/sched/act_ipt.c11
-rw-r--r--net/sched/act_mirred.c25
-rw-r--r--net/sched/act_nat.c15
-rw-r--r--net/sched/act_pedit.c18
-rw-r--r--net/sched/act_police.c13
-rw-r--r--net/sched/act_sample.c31
-rw-r--r--net/sched/act_simple.c54
-rw-r--r--net/sched/act_skbedit.c20
-rw-r--r--net/sched/act_skbmod.c20
-rw-r--r--net/sched/act_tunnel_key.c19
-rw-r--r--net/sched/act_vlan.c22
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sched/cls_matchall.c5
-rw-r--r--net/sched/sch_cake.c38
-rw-r--r--net/sched/sch_cbq.c10
-rw-r--r--net/sched/sch_drr.c16
-rw-r--r--net/sched/sch_hfsc.c19
-rw-r--r--net/sched/sch_htb.c22
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c3
-rw-r--r--net/sched/sch_multiq.c10
-rw-r--r--net/sched/sch_prio.c10
-rw-r--r--net/sched/sch_qfq.c14
-rw-r--r--net/sched/sch_red.c3
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_taprio.c2
-rw-r--r--net/sched/sch_tbf.c3
-rw-r--r--net/sctp/protocol.c1
-rw-r--r--net/sctp/socket.c54
-rw-r--r--net/socket.c277
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/clnt.c12
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/group.c3
-rw-r--r--net/tipc/net.c5
-rw-r--r--net/tipc/netlink_compat.c24
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tipc/socket.c22
-rw-r--r--net/tipc/topsrv.c1
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/xdp/xdp_umem.c19
-rw-r--r--scripts/Makefile.build7
-rwxr-xr-xscripts/checkpatch.pl2
-rw-r--r--scripts/coccinelle/api/stream_open.cocci363
-rw-r--r--scripts/coccinelle/free/put_device.cocci1
-rw-r--r--scripts/coccinelle/misc/badty.cocci2
-rw-r--r--scripts/kconfig/lxdialog/inputbox.c3
-rw-r--r--scripts/kconfig/nconf.c2
-rw-r--r--scripts/kconfig/nconf.gui.c3
-rw-r--r--scripts/mod/modpost.c2
-rw-r--r--security/Kconfig38
-rw-r--r--security/keys/trusted.c32
-rw-r--r--security/yama/yama_lsm.c8
-rw-r--r--sound/core/oss/pcm_oss.c43
-rw-r--r--sound/core/pcm_native.c9
-rw-r--r--sound/core/rawmidi.c2
-rw-r--r--sound/core/seq/oss/seq_oss_synth.c7
-rw-r--r--sound/pci/hda/patch_ca0132.c20
-rw-r--r--sound/pci/hda/patch_realtek.c35
-rw-r--r--tools/arch/alpha/include/uapi/asm/mman.h2
-rw-r--r--tools/arch/mips/include/uapi/asm/mman.h2
-rw-r--r--tools/arch/parisc/include/uapi/asm/mman.h2
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h2
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h3
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/arch/xtensa/include/uapi/asm/mman.h2
-rw-r--r--tools/build/feature/test-libopencsd.c4
-rw-r--r--tools/include/uapi/asm-generic/mman-common-tools.h23
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h4
-rw-r--r--tools/include/uapi/asm-generic/mman.h2
-rw-r--r--tools/include/uapi/asm-generic/unistd.h11
-rw-r--r--tools/include/uapi/drm/i915_drm.h64
-rw-r--r--tools/include/uapi/linux/bpf.h188
-rw-r--r--tools/include/uapi/linux/fcntl.h1
-rw-r--r--tools/include/uapi/linux/mman.h4
-rw-r--r--tools/lib/bpf/Makefile47
-rw-r--r--tools/lib/bpf/README.rst1
-rw-r--r--tools/lib/bpf/btf.c50
-rw-r--r--tools/lib/bpf/libbpf.c13
-rw-r--r--tools/lib/bpf/xsk.c15
-rw-r--r--tools/objtool/Makefile7
-rw-r--r--tools/perf/Makefile.perf4
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl4
-rwxr-xr-xtools/perf/check-headers.sh2
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py77
-rwxr-xr-xtools/perf/trace/beauty/mmap_flags.sh14
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c1
-rw-r--r--tools/perf/util/evlist.c29
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/evsel.c72
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c20
-rw-r--r--tools/perf/util/machine.c32
-rw-r--r--tools/perf/util/pmu.c10
-rw-r--r--tools/power/x86/turbostat/turbostat.c277
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields_kern.c88
-rw-r--r--tools/testing/selftests/bpf/test_btf.c91
-rw-r--r--tools/testing/selftests/bpf/test_sock_fields.c134
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c63
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c168
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c4
-rw-r--r--tools/testing/selftests/kvm/.gitignore7
-rw-r--r--tools/testing/selftests/kvm/Makefile13
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c8
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h5
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h27
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c53
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c20
-rw-r--r--tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c35
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c70
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c157
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c280
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gact.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/nat.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json51
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/sample.json49
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/simple.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json20
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py5
-rw-r--r--tools/testing/selftests/tpm2/tpm2_tests.py63
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/arm/arm.c3
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c4
-rw-r--r--virt/kvm/arm/mmu.c125
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c31
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c4
-rw-r--r--virt/kvm/arm/vgic/vgic.c14
-rw-r--r--virt/kvm/eventfd.c6
-rw-r--r--virt/kvm/irqchip.c5
-rw-r--r--virt/kvm/kvm_main.c117
923 files changed, 13699 insertions, 10220 deletions
diff --git a/.mailmap b/.mailmap
index 37e1847c7988..ae2bcad06f4b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -156,6 +156,8 @@ Morten Welinder <welinder@darter.rentec.com>
Morten Welinder <welinder@troll.com>
Mythri P K <mythripk@ti.com>
Nguyen Anh Quynh <aquynh@gmail.com>
+Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org>
+Nicolas Pitre <nico@fluxnic.net> <nico@linaro.org>
Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Patrick Mochel <mochel@digitalimplant.org>
Paul Burton <paul.burton@mips.com> <paul.burton@imgtec.com>
@@ -224,3 +226,5 @@ Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
Yusuke Goda <goda.yusuke@renesas.com>
Gustavo Padovan <gustavo@las.ic.unicamp.br>
Gustavo Padovan <padovan@profusion.mobi>
+Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
+Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
index b8ca28b60215..7e71c9c1d8e9 100644
--- a/Documentation/accounting/psi.txt
+++ b/Documentation/accounting/psi.txt
@@ -56,12 +56,12 @@ situation from a state where some tasks are stalled but the CPU is
still doing productive work. As such, time spent in this subset of the
stall state is tracked separately and exported in the "full" averages.
-The ratios are tracked as recent trends over ten, sixty, and three
-hundred second windows, which gives insight into short term events as
-well as medium and long term trends. The total absolute stall time is
-tracked and exported as well, to allow detection of latency spikes
-which wouldn't necessarily make a dent in the time averages, or to
-average trends over custom time frames.
+The ratios (in %) are tracked as recent trends over ten, sixty, and
+three hundred second windows, which gives insight into short term events
+as well as medium and long term trends. The total absolute stall time
+(in us) is tracked and exported as well, to allow detection of latency
+spikes which wouldn't necessarily make a dent in the time averages,
+or to average trends over custom time frames.
Cgroup2 interface
=================
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 9a60a5d60e38..7313d354f20e 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -148,16 +148,16 @@ The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()``
for the type. The maximum value of ``BTF_INT_BITS()`` is 128.
The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values
-for this int. For example, a bitfield struct member has: * btf member bit
-offset 100 from the start of the structure, * btf member pointing to an int
-type, * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
+for this int. For example, a bitfield struct member has:
+ * btf member bit offset 100 from the start of the structure,
+ * btf member pointing to an int type,
+ * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
Then in the struct memory layout, this member will occupy ``4`` bits starting
from bits ``100 + 2 = 102``.
Alternatively, the bitfield struct member can be the following to access the
same bits as the above:
-
* btf member bit offset 102,
* btf member pointing to an int type,
* the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 365dcf384d73..82dd7582e945 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -228,7 +228,7 @@ patternProperties:
- renesas,r9a06g032-smp
- rockchip,rk3036-smp
- rockchip,rk3066-smp
- - socionext,milbeaut-m10v-smp
+ - socionext,milbeaut-m10v-smp
- ste,dbx500-smp
cpu-release-addr:
diff --git a/Documentation/devicetree/bindings/hwmon/adc128d818.txt b/Documentation/devicetree/bindings/hwmon/adc128d818.txt
index 08bab0e94d25..d0ae46d7bac3 100644
--- a/Documentation/devicetree/bindings/hwmon/adc128d818.txt
+++ b/Documentation/devicetree/bindings/hwmon/adc128d818.txt
@@ -26,7 +26,7 @@ Required node properties:
Optional node properties:
- - ti,mode: Operation mode (see above).
+ - ti,mode: Operation mode (u8) (see above).
Example (operation mode 2):
@@ -34,5 +34,5 @@ Example (operation mode 2):
adc128d818@1d {
compatible = "ti,adc128d818";
reg = <0x1d>;
- ti,mode = <2>;
+ ti,mode = /bits/ 8 <2>;
};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-xscale.txt b/Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt
index dcc8390e0d24..dcc8390e0d24 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-xscale.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mtk.txt b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt
index ee4c32454198..ee4c32454198 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-mtk.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt
diff --git a/Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt b/Documentation/devicetree/bindings/i2c/i2c-stu300.txt
index bd81a482634f..bd81a482634f 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-stu300.txt
diff --git a/Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt b/Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt
index 49df0053347a..49df0053347a 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt
diff --git a/Documentation/devicetree/bindings/i2c/i2c-vt8500.txt b/Documentation/devicetree/bindings/i2c/i2c-wmt.txt
index 94a425eaa6c7..94a425eaa6c7 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-vt8500.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-wmt.txt
diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
index bbcb255c3150..93a7469e70d4 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
@@ -12,10 +12,15 @@ Required properties:
Subnodes:
The integrated switch subnode should be specified according to the binding
-described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of
-port and PHY id, each subnode describing a port needs to have a valid phandle
-referencing the internal PHY connected to it. The CPU port of this switch is
-always port 0.
+described in dsa/dsa.txt. If the QCA8K switch is connect to a SoC's external
+mdio-bus each subnode describing a port needs to have a valid phandle
+referencing the internal PHY it is connected to. This is because there's no
+N:N mapping of port and PHY id.
+
+Don't use mixed external and internal mdio-bus configurations, as this is
+not supported by the hardware.
+
+The CPU port of this switch is always port 0.
A CPU port node has the following optional node:
@@ -31,8 +36,9 @@ For QCA8K the 'fixed-link' sub-node supports only the following properties:
- 'full-duplex' (boolean, optional), to indicate that full duplex is
used. When absent, half duplex is assumed.
-Example:
+Examples:
+for the external mdio-bus configuration:
&mdio0 {
phy_port1: phy@0 {
@@ -55,12 +61,12 @@ Example:
reg = <4>;
};
- switch0@0 {
+ switch@10 {
compatible = "qca,qca8337";
#address-cells = <1>;
#size-cells = <0>;
- reg = <0>;
+ reg = <0x10>;
ports {
#address-cells = <1>;
@@ -108,3 +114,56 @@ Example:
};
};
};
+
+for the internal master mdio-bus configuration:
+
+ &mdio0 {
+ switch@10 {
+ compatible = "qca,qca8337";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ reg = <0x10>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "cpu";
+ ethernet = <&gmac1>;
+ phy-mode = "rgmii";
+ fixed-link {
+ speed = 1000;
+ full-duplex;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan1";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan2";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan3";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "lan4";
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "wan";
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/serial/mtk-uart.txt b/Documentation/devicetree/bindings/serial/mtk-uart.txt
index 742cb470595b..bcfb13194f16 100644
--- a/Documentation/devicetree/bindings/serial/mtk-uart.txt
+++ b/Documentation/devicetree/bindings/serial/mtk-uart.txt
@@ -16,6 +16,7 @@ Required properties:
* "mediatek,mt8127-uart" for MT8127 compatible UARTS
* "mediatek,mt8135-uart" for MT8135 compatible UARTS
* "mediatek,mt8173-uart" for MT8173 compatible UARTS
+ * "mediatek,mt8183-uart", "mediatek,mt6577-uart" for MT8183 compatible UARTS
* "mediatek,mt6577-uart" for MT6577 and all of the above
- reg: The base address of the UART register bank.
diff --git a/Documentation/filesystems/mount_api.txt b/Documentation/filesystems/mount_api.txt
index 944d1965e917..00ff0cfccfa7 100644
--- a/Documentation/filesystems/mount_api.txt
+++ b/Documentation/filesystems/mount_api.txt
@@ -12,11 +12,13 @@ CONTENTS
(4) Filesystem context security.
- (5) VFS filesystem context operations.
+ (5) VFS filesystem context API.
- (6) Parameter description.
+ (6) Superblock creation helpers.
- (7) Parameter helper functions.
+ (7) Parameter description.
+
+ (8) Parameter helper functions.
========
@@ -41,12 +43,15 @@ The creation of new mounts is now to be done in a multistep process:
(7) Destroy the context.
-To support this, the file_system_type struct gains a new field:
+To support this, the file_system_type struct gains two new fields:
int (*init_fs_context)(struct fs_context *fc);
+ const struct fs_parameter_description *parameters;
-which is invoked to set up the filesystem-specific parts of a filesystem
-context, including the additional space.
+The first is invoked to set up the filesystem-specific parts of a filesystem
+context, including the additional space, and the second points to the
+parameter description for validation at registration time and querying by a
+future system call.
Note that security initialisation is done *after* the filesystem is called so
that the namespaces may be adjusted first.
@@ -73,9 +78,9 @@ context. This is represented by the fs_context structure:
void *s_fs_info;
unsigned int sb_flags;
unsigned int sb_flags_mask;
+ unsigned int s_iflags;
+ unsigned int lsm_flags;
enum fs_context_purpose purpose:8;
- bool sloppy:1;
- bool silent:1;
...
};
@@ -141,6 +146,10 @@ The fs_context fields are as follows:
Which bits SB_* flags are to be set/cleared in super_block::s_flags.
+ (*) unsigned int s_iflags
+
+ These will be bitwise-OR'd with s->s_iflags when a superblock is created.
+
(*) enum fs_context_purpose
This indicates the purpose for which the context is intended. The
@@ -150,17 +159,6 @@ The fs_context fields are as follows:
FS_CONTEXT_FOR_SUBMOUNT -- New automatic submount of extant mount
FS_CONTEXT_FOR_RECONFIGURE -- Change an existing mount
- (*) bool sloppy
- (*) bool silent
-
- These are set if the sloppy or silent mount options are given.
-
- [NOTE] sloppy is probably unnecessary when userspace passes over one
- option at a time since the error can just be ignored if userspace deems it
- to be unimportant.
-
- [NOTE] silent is probably redundant with sb_flags & SB_SILENT.
-
The mount context is created by calling vfs_new_fs_context() or
vfs_dup_fs_context() and is destroyed with put_fs_context(). Note that the
structure is not refcounted.
@@ -342,28 +340,47 @@ number of operations used by the new mount code for this purpose:
It should return 0 on success or a negative error code on failure.
-=================================
-VFS FILESYSTEM CONTEXT OPERATIONS
-=================================
+==========================
+VFS FILESYSTEM CONTEXT API
+==========================
-There are four operations for creating a filesystem context and
-one for destroying a context:
+There are four operations for creating a filesystem context and one for
+destroying a context:
- (*) struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
- struct dentry *reference,
- unsigned int sb_flags,
- unsigned int sb_flags_mask,
- enum fs_context_purpose purpose);
+ (*) struct fs_context *fs_context_for_mount(
+ struct file_system_type *fs_type,
+ unsigned int sb_flags);
- Create a filesystem context for a given filesystem type and purpose. This
- allocates the filesystem context, sets the superblock flags, initialises
- the security and calls fs_type->init_fs_context() to initialise the
- filesystem private data.
+ Allocate a filesystem context for the purpose of setting up a new mount,
+ whether that be with a new superblock or sharing an existing one. This
+ sets the superblock flags, initialises the security and calls
+ fs_type->init_fs_context() to initialise the filesystem private data.
- reference can be NULL or it may indicate the root dentry of a superblock
- that is going to be reconfigured (FS_CONTEXT_FOR_RECONFIGURE) or
- the automount point that triggered a submount (FS_CONTEXT_FOR_SUBMOUNT).
- This is provided as a source of namespace information.
+ fs_type specifies the filesystem type that will manage the context and
+ sb_flags presets the superblock flags stored therein.
+
+ (*) struct fs_context *fs_context_for_reconfigure(
+ struct dentry *dentry,
+ unsigned int sb_flags,
+ unsigned int sb_flags_mask);
+
+ Allocate a filesystem context for the purpose of reconfiguring an
+ existing superblock. dentry provides a reference to the superblock to be
+ configured. sb_flags and sb_flags_mask indicate which superblock flags
+ need changing and to what.
+
+ (*) struct fs_context *fs_context_for_submount(
+ struct file_system_type *fs_type,
+ struct dentry *reference);
+
+ Allocate a filesystem context for the purpose of creating a new mount for
+ an automount point or other derived superblock. fs_type specifies the
+ filesystem type that will manage the context and the reference dentry
+ supplies the parameters. Namespaces are propagated from the reference
+ dentry's superblock also.
+
+ Note that it's not a requirement that the reference dentry be of the same
+ filesystem type as fs_type.
(*) struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc);
@@ -390,20 +407,6 @@ context pointer or a negative error code.
For the remaining operations, if an error occurs, a negative error code will be
returned.
- (*) int vfs_get_tree(struct fs_context *fc);
-
- Get or create the mountable root and superblock, using the parameters in
- the filesystem context to select/configure the superblock. This invokes
- the ->validate() op and then the ->get_tree() op.
-
- [NOTE] ->validate() could perhaps be rolled into ->get_tree() and
- ->reconfigure().
-
- (*) struct vfsmount *vfs_create_mount(struct fs_context *fc);
-
- Create a mount given the parameters in the specified filesystem context.
- Note that this does not attach the mount to anything.
-
(*) int vfs_parse_fs_param(struct fs_context *fc,
struct fs_parameter *param);
@@ -432,17 +435,80 @@ returned.
clear the pointer, but then becomes responsible for disposing of the
object.
- (*) int vfs_parse_fs_string(struct fs_context *fc, char *key,
+ (*) int vfs_parse_fs_string(struct fs_context *fc, const char *key,
const char *value, size_t v_size);
- A wrapper around vfs_parse_fs_param() that just passes a constant string.
+ A wrapper around vfs_parse_fs_param() that copies the value string it is
+ passed.
(*) int generic_parse_monolithic(struct fs_context *fc, void *data);
Parse a sys_mount() data page, assuming the form to be a text list
consisting of key[=val] options separated by commas. Each item in the
list is passed to vfs_mount_option(). This is the default when the
- ->parse_monolithic() operation is NULL.
+ ->parse_monolithic() method is NULL.
+
+ (*) int vfs_get_tree(struct fs_context *fc);
+
+ Get or create the mountable root and superblock, using the parameters in
+ the filesystem context to select/configure the superblock. This invokes
+ the ->get_tree() method.
+
+ (*) struct vfsmount *vfs_create_mount(struct fs_context *fc);
+
+ Create a mount given the parameters in the specified filesystem context.
+ Note that this does not attach the mount to anything.
+
+
+===========================
+SUPERBLOCK CREATION HELPERS
+===========================
+
+A number of VFS helpers are available for use by filesystems for the creation
+or looking up of superblocks.
+
+ (*) struct super_block *
+ sget_fc(struct fs_context *fc,
+ int (*test)(struct super_block *sb, struct fs_context *fc),
+ int (*set)(struct super_block *sb, struct fs_context *fc));
+
+ This is the core routine. If test is non-NULL, it searches for an
+ existing superblock matching the criteria held in the fs_context, using
+ the test function to match them. If no match is found, a new superblock
+ is created and the set function is called to set it up.
+
+ Prior to the set function being called, fc->s_fs_info will be transferred
+ to sb->s_fs_info - and fc->s_fs_info will be cleared if set returns
+ success (ie. 0).
+
+The following helpers all wrap sget_fc():
+
+ (*) int vfs_get_super(struct fs_context *fc,
+ enum vfs_get_super_keying keying,
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc))
+
+ This creates/looks up a deviceless superblock. The keying indicates how
+ many superblocks of this type may exist and in what manner they may be
+ shared:
+
+ (1) vfs_get_single_super
+
+ Only one such superblock may exist in the system. Any further
+ attempt to get a new superblock gets this one (and any parameter
+ differences are ignored).
+
+ (2) vfs_get_keyed_super
+
+ Multiple superblocks of this type may exist and they're keyed on
+ their s_fs_info pointer (for example this may refer to a
+ namespace).
+
+ (3) vfs_get_independent_super
+
+ Multiple independent superblocks of this type may exist. This
+ function never matches an existing one and always creates a new
+ one.
=====================
@@ -454,35 +520,22 @@ There's a core description struct that links everything together:
struct fs_parameter_description {
const char name[16];
- u8 nr_params;
- u8 nr_alt_keys;
- u8 nr_enums;
- bool ignore_unknown;
- bool no_source;
- const char *const *keys;
- const struct constant_table *alt_keys;
const struct fs_parameter_spec *specs;
const struct fs_parameter_enum *enums;
};
For example:
- enum afs_param {
+ enum {
Opt_autocell,
Opt_bar,
Opt_dyn,
Opt_foo,
Opt_source,
- nr__afs_params
};
static const struct fs_parameter_description afs_fs_parameters = {
.name = "kAFS",
- .nr_params = nr__afs_params,
- .nr_alt_keys = ARRAY_SIZE(afs_param_alt_keys),
- .nr_enums = ARRAY_SIZE(afs_param_enums),
- .keys = afs_param_keys,
- .alt_keys = afs_param_alt_keys,
.specs = afs_param_specs,
.enums = afs_param_enums,
};
@@ -494,28 +547,24 @@ The members are as follows:
The name to be used in error messages generated by the parse helper
functions.
- (2) u8 nr_params;
-
- The number of discrete parameter identifiers. This indicates the number
- of elements in the ->types[] array and also limits the values that may be
- used in the values that the ->keys[] array maps to.
-
- It is expected that, for example, two parameters that are related, say
- "acl" and "noacl" with have the same ID, but will be flagged to indicate
- that one is the inverse of the other. The value can then be picked out
- from the parse result.
+ (2) const struct fs_parameter_specification *specs;
- (3) const struct fs_parameter_specification *specs;
+ Table of parameter specifications, terminated with a null entry, where the
+ entries are of type:
- Table of parameter specifications, where the entries are of type:
-
- struct fs_parameter_type {
- enum fs_parameter_spec type:8;
- u8 flags;
+ struct fs_parameter_spec {
+ const char *name;
+ u8 opt;
+ enum fs_parameter_type type:8;
+ unsigned short flags;
};
- and the parameter identifier is the index to the array. 'type' indicates
- the desired value type and must be one of:
+ The 'name' field is a string to match exactly to the parameter key (no
+ wildcards, patterns and no case-independence) and 'opt' is the value that
+ will be returned by the fs_parser() function in the case of a successful
+ match.
+
+ The 'type' field indicates the desired value type and must be one of:
TYPE NAME EXPECTED VALUE RESULT IN
======================= ======================= =====================
@@ -525,85 +574,65 @@ The members are as follows:
fs_param_is_u32_octal 32-bit octal int result->uint_32
fs_param_is_u32_hex 32-bit hex int result->uint_32
fs_param_is_s32 32-bit signed int result->int_32
+ fs_param_is_u64 64-bit unsigned int result->uint_64
fs_param_is_enum Enum value name result->uint_32
fs_param_is_string Arbitrary string param->string
fs_param_is_blob Binary blob param->blob
fs_param_is_blockdev Blockdev path * Needs lookup
fs_param_is_path Path * Needs lookup
- fs_param_is_fd File descriptor param->file
-
- And each parameter can be qualified with 'flags':
-
- fs_param_v_optional The value is optional
- fs_param_neg_with_no If key name is prefixed with "no", it is false
- fs_param_neg_with_empty If value is "", it is false
- fs_param_deprecated The parameter is deprecated.
-
- For example:
-
- static const struct fs_parameter_spec afs_param_specs[nr__afs_params] = {
- [Opt_autocell] = { fs_param_is flag },
- [Opt_bar] = { fs_param_is_enum },
- [Opt_dyn] = { fs_param_is flag },
- [Opt_foo] = { fs_param_is_bool, fs_param_neg_with_no },
- [Opt_source] = { fs_param_is_string },
- };
+ fs_param_is_fd File descriptor result->int_32
Note that if the value is of fs_param_is_bool type, fs_parse() will try
to match any string value against "0", "1", "no", "yes", "false", "true".
- [!] NOTE that the table must be sorted according to primary key name so
- that ->keys[] is also sorted.
-
- (4) const char *const *keys;
-
- Table of primary key names for the parameters. There must be one entry
- per defined parameter. The table is optional if ->nr_params is 0. The
- table is just an array of names e.g.:
+ Each parameter can also be qualified with 'flags':
- static const char *const afs_param_keys[nr__afs_params] = {
- [Opt_autocell] = "autocell",
- [Opt_bar] = "bar",
- [Opt_dyn] = "dyn",
- [Opt_foo] = "foo",
- [Opt_source] = "source",
- };
-
- [!] NOTE that the table must be sorted such that the table can be searched
- with bsearch() using strcmp(). This means that the Opt_* values must
- correspond to the entries in this table.
-
- (5) const struct constant_table *alt_keys;
- u8 nr_alt_keys;
-
- Table of additional key names and their mappings to parameter ID plus the
- number of elements in the table. This is optional. The table is just an
- array of { name, integer } pairs, e.g.:
+ fs_param_v_optional The value is optional
+ fs_param_neg_with_no result->negated set if key is prefixed with "no"
+ fs_param_neg_with_empty result->negated set if value is ""
+ fs_param_deprecated The parameter is deprecated.
- static const struct constant_table afs_param_keys[] = {
- { "baz", Opt_bar },
- { "dynamic", Opt_dyn },
+ These are wrapped with a number of convenience wrappers:
+
+ MACRO SPECIFIES
+ ======================= ===============================================
+ fsparam_flag() fs_param_is_flag
+ fsparam_flag_no() fs_param_is_flag, fs_param_neg_with_no
+ fsparam_bool() fs_param_is_bool
+ fsparam_u32() fs_param_is_u32
+ fsparam_u32oct() fs_param_is_u32_octal
+ fsparam_u32hex() fs_param_is_u32_hex
+ fsparam_s32() fs_param_is_s32
+ fsparam_u64() fs_param_is_u64
+ fsparam_enum() fs_param_is_enum
+ fsparam_string() fs_param_is_string
+ fsparam_blob() fs_param_is_blob
+ fsparam_bdev() fs_param_is_blockdev
+ fsparam_path() fs_param_is_path
+ fsparam_fd() fs_param_is_fd
+
+ all of which take two arguments, name string and option number - for
+ example:
+
+ static const struct fs_parameter_spec afs_param_specs[] = {
+ fsparam_flag ("autocell", Opt_autocell),
+ fsparam_flag ("dyn", Opt_dyn),
+ fsparam_string ("source", Opt_source),
+ fsparam_flag_no ("foo", Opt_foo),
+ {}
};
- [!] NOTE that the table must be sorted such that strcmp() can be used with
- bsearch() to search the entries.
-
- The parameter ID can also be fs_param_key_removed to indicate that a
- deprecated parameter has been removed and that an error will be given.
- This differs from fs_param_deprecated where the parameter may still have
- an effect.
-
- Further, the behaviour of the parameter may differ when an alternate name
- is used (for instance with NFS, "v3", "v4.2", etc. are alternate names).
+ An addition macro, __fsparam() is provided that takes an additional pair
+ of arguments to specify the type and the flags for anything that doesn't
+ match one of the above macros.
(6) const struct fs_parameter_enum *enums;
- u8 nr_enums;
- Table of enum value names to integer mappings and the number of elements
- stored therein. This is of type:
+ Table of enum value names to integer mappings, terminated with a null
+ entry. This is of type:
struct fs_parameter_enum {
- u8 param_id;
+ u8 opt;
char name[14];
u8 value;
};
@@ -621,11 +650,6 @@ The members are as follows:
try to look the value up in the enum table and the result will be stored
in the parse result.
- (7) bool no_source;
-
- If this is set, fs_parse() will ignore any "source" parameter and not
- pass it to the filesystem.
-
The parser should be pointed to by the parser pointer in the file_system_type
struct as this will provide validation on registration (if
CONFIG_VALIDATE_FS_PARSER=y) and will allow the description to be queried from
@@ -650,9 +674,8 @@ process the parameters it is given.
int value;
};
- and it must be sorted such that it can be searched using bsearch() using
- strcmp(). If a match is found, the corresponding value is returned. If a
- match isn't found, the not_found value is returned instead.
+ If a match is found, the corresponding value is returned. If a match
+ isn't found, the not_found value is returned instead.
(*) bool validate_constant_table(const struct constant_table *tbl,
size_t tbl_size,
@@ -665,36 +688,36 @@ process the parameters it is given.
should just be set to lie inside the low-to-high range.
If all is good, true is returned. If the table is invalid, errors are
- logged to dmesg, the stack is dumped and false is returned.
+ logged to dmesg and false is returned.
+
+ (*) bool fs_validate_description(const struct fs_parameter_description *desc);
+
+ This performs some validation checks on a parameter description. It
+ returns true if the description is good and false if it is not. It will
+ log errors to dmesg if validation fails.
(*) int fs_parse(struct fs_context *fc,
- const struct fs_param_parser *parser,
+ const struct fs_parameter_description *desc,
struct fs_parameter *param,
- struct fs_param_parse_result *result);
+ struct fs_parse_result *result);
This is the main interpreter of parameters. It uses the parameter
- description (parser) to look up the name of the parameter to use and to
- convert that to a parameter ID (stored in result->key).
+ description to look up a parameter by key name and to convert that to an
+ option number (which it returns).
If successful, and if the parameter type indicates the result is a
boolean, integer or enum type, the value is converted by this function and
- the result stored in result->{boolean,int_32,uint_32}.
+ the result stored in result->{boolean,int_32,uint_32,uint_64}.
If a match isn't initially made, the key is prefixed with "no" and no
value is present then an attempt will be made to look up the key with the
prefix removed. If this matches a parameter for which the type has flag
- fs_param_neg_with_no set, then a match will be made and the value will be
- set to false/0/NULL.
-
- If the parameter is successfully matched and, optionally, parsed
- correctly, 1 is returned. If the parameter isn't matched and
- parser->ignore_unknown is set, then 0 is returned. Otherwise -EINVAL is
- returned.
-
- (*) bool fs_validate_description(const struct fs_parameter_description *desc);
+ fs_param_neg_with_no set, then a match will be made and result->negated
+ will be set to true.
- This is validates the parameter description. It returns true if the
- description is good and false if it is not.
+ If the parameter isn't matched, -ENOPARAM will be returned; if the
+ parameter is matched, but the value is erroneous, -EINVAL will be
+ returned; otherwise the parameter's option number will be returned.
(*) int fs_lookup_param(struct fs_context *fc,
struct fs_parameter *value,
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index d1ee484a787d..ee9984f35868 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -36,6 +36,7 @@ Supported adapters:
* Intel Cannon Lake (PCH)
* Intel Cedar Fork (PCH)
* Intel Ice Lake (PCH)
+ * Intel Comet Lake (PCH)
Datasheets: Publicly available at the Intel website
On Intel Patsburg and later chipsets, both the normal host SMBus controller
diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
index f79934225d8d..ca983328976b 100644
--- a/Documentation/lzo.txt
+++ b/Documentation/lzo.txt
@@ -102,9 +102,11 @@ Byte sequences
dictionary which is empty, and that it will always be
invalid at this place.
- 17 : bitstream version. If the first byte is 17, the next byte
- gives the bitstream version (version 1 only). If the first byte
- is not 17, the bitstream version is 0.
+ 17 : bitstream version. If the first byte is 17, and compressed
+ stream length is at least 5 bytes (length of shortest possible
+ versioned bitstream), the next byte gives the bitstream version
+ (version 1 only).
+ Otherwise, the bitstream version is 0.
18..21 : copy 0..3 literals
state = (byte - 17) = 0..3 [ copy <state> literals ]
diff --git a/Documentation/networking/bpf_flow_dissector.rst b/Documentation/networking/bpf_flow_dissector.rst
new file mode 100644
index 000000000000..b375ae2ec2c4
--- /dev/null
+++ b/Documentation/networking/bpf_flow_dissector.rst
@@ -0,0 +1,126 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+BPF Flow Dissector
+==================
+
+Overview
+========
+
+Flow dissector is a routine that parses metadata out of the packets. It's
+used in the various places in the networking subsystem (RFS, flow hash, etc).
+
+BPF flow dissector is an attempt to reimplement C-based flow dissector logic
+in BPF to gain all the benefits of BPF verifier (namely, limits on the
+number of instructions and tail calls).
+
+API
+===
+
+BPF flow dissector programs operate on an ``__sk_buff``. However, only the
+limited set of fields is allowed: ``data``, ``data_end`` and ``flow_keys``.
+``flow_keys`` is ``struct bpf_flow_keys`` and contains flow dissector input
+and output arguments.
+
+The inputs are:
+ * ``nhoff`` - initial offset of the networking header
+ * ``thoff`` - initial offset of the transport header, initialized to nhoff
+ * ``n_proto`` - L3 protocol type, parsed out of L2 header
+
+Flow dissector BPF program should fill out the rest of the ``struct
+bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be
+also adjusted accordingly.
+
+The return code of the BPF program is either BPF_OK to indicate successful
+dissection, or BPF_DROP to indicate parsing error.
+
+__sk_buff->data
+===============
+
+In the VLAN-less case, this is what the initial state of the BPF flow
+dissector looks like::
+
+ +------+------+------------+-----------+
+ | DMAC | SMAC | ETHER_TYPE | L3_HEADER |
+ +------+------+------------+-----------+
+ ^
+ |
+ +-- flow dissector starts here
+
+
+.. code:: c
+
+ skb->data + flow_keys->nhoff point to the first byte of L3_HEADER
+ flow_keys->thoff = nhoff
+ flow_keys->n_proto = ETHER_TYPE
+
+In case of VLAN, flow dissector can be called with the two different states.
+
+Pre-VLAN parsing::
+
+ +------+------+------+-----+-----------+-----------+
+ | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
+ +------+------+------+-----+-----------+-----------+
+ ^
+ |
+ +-- flow dissector starts here
+
+.. code:: c
+
+ skb->data + flow_keys->nhoff point the to first byte of TCI
+ flow_keys->thoff = nhoff
+ flow_keys->n_proto = TPID
+
+Please note that TPID can be 802.1AD and, hence, BPF program would
+have to parse VLAN information twice for double tagged packets.
+
+
+Post-VLAN parsing::
+
+ +------+------+------+-----+-----------+-----------+
+ | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
+ +------+------+------+-----+-----------+-----------+
+ ^
+ |
+ +-- flow dissector starts here
+
+.. code:: c
+
+ skb->data + flow_keys->nhoff point the to first byte of L3_HEADER
+ flow_keys->thoff = nhoff
+ flow_keys->n_proto = ETHER_TYPE
+
+In this case VLAN information has been processed before the flow dissector
+and BPF flow dissector is not required to handle it.
+
+
+The takeaway here is as follows: BPF flow dissector program can be called with
+the optional VLAN header and should gracefully handle both cases: when single
+or double VLAN is present and when it is not present. The same program
+can be called for both cases and would have to be written carefully to
+handle both cases.
+
+
+Reference Implementation
+========================
+
+See ``tools/testing/selftests/bpf/progs/bpf_flow.c`` for the reference
+implementation and ``tools/testing/selftests/bpf/flow_dissector_load.[hc]``
+for the loader. bpftool can be used to load BPF flow dissector program as well.
+
+The reference implementation is organized as follows:
+ * ``jmp_table`` map that contains sub-programs for each supported L3 protocol
+ * ``_dissect`` routine - entry point; it does input ``n_proto`` parsing and
+ does ``bpf_tail_call`` to the appropriate L3 handler
+
+Since BPF at this point doesn't support looping (or any jumping back),
+jmp_table is used instead to handle multiple levels of encapsulation (and
+IPv6 options).
+
+
+Current Limitations
+===================
+BPF flow dissector doesn't support exporting all the metadata that in-kernel
+C-based implementation can export. Notable example is single VLAN (802.1Q)
+and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
+for a set of information that's currently can be exported from the BPF context.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 5449149be496..984e68f9e026 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -9,6 +9,7 @@ Contents:
netdev-FAQ
af_xdp
batman-adv
+ bpf_flow_dissector
can
can_ucan_protocol
device_drivers/freescale/dpaa2/index
diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
index 18c1415e7bfa..ace56204dd03 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -50,7 +50,7 @@ the excellent reporting over at LWN.net or read the original code.
patchset
[PATCH net-next v4 0/9] socket sendmsg MSG_ZEROCOPY
- http://lkml.kernel.org/r/20170803202945.70750-1-willemdebruijn.kernel@gmail.com
+ https://lkml.kernel.org/netdev/20170803202945.70750-1-willemdebruijn.kernel@gmail.com
Interface
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 0ac5fa77f501..8c7a713cf657 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -131,6 +131,19 @@ it to the maintainer to figure out what is the most recent and current
version that should be applied. If there is any doubt, the maintainer
will reply and ask what should be done.
+Q: I made changes to only a few patches in a patch series should I resend only those changed?
+--------------------------------------------------------------------------------------------
+A: No, please resend the entire patch series and make sure you do number your
+patches such that it is clear this is the latest and greatest set of patches
+that can be applied.
+
+Q: I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do?
+-------------------------------------------------------------------------------------------------------------------------------------------
+A: There is no revert possible, once it is pushed out, it stays like that.
+Please send incremental versions on top of what has been merged in order to fix
+the patches the way they would look like if your latest patch series was to be
+merged.
+
Q: How can I tell what patches are queued up for backporting to the various stable releases?
--------------------------------------------------------------------------------------------
A: Normally Greg Kroah-Hartman collects stable commits himself, but for
diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt
index 54128c50d508..ca2136c76042 100644
--- a/Documentation/networking/nf_flowtable.txt
+++ b/Documentation/networking/nf_flowtable.txt
@@ -44,10 +44,10 @@ including the Netfilter hooks and the flowtable fastpath bypass.
/ \ / \ |Routing | / \
--> ingress ---> prerouting ---> |decision| | postrouting |--> neigh_xmit
\_________/ \__________/ ---------- \____________/ ^
- | ^ | | ^ |
- flowtable | | ____\/___ | |
- | | | / \ | |
- __\/___ | --------->| forward |------------ |
+ | ^ | ^ |
+ flowtable | ____\/___ | |
+ | | / \ | |
+ __\/___ | | forward |------------ |
|-----| | \_________/ |
|-----| | 'flow offload' rule |
|-----| | adds entry to |
diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst
index 52b026be028f..38a4edc4522b 100644
--- a/Documentation/networking/snmp_counter.rst
+++ b/Documentation/networking/snmp_counter.rst
@@ -413,7 +413,7 @@ algorithm.
.. _F-RTO: https://tools.ietf.org/html/rfc5682
TCP Fast Path
-============
+=============
When kernel receives a TCP packet, it has two paths to handler the
packet, one is fast path, another is slow path. The comment in kernel
code provides a good explanation of them, I pasted them below::
@@ -681,6 +681,7 @@ The TCP stack receives an out of order duplicate packet, so it sends a
DSACK to the sender.
* TcpExtTCPDSACKRecv
+
The TCP stack receives a DSACK, which indicates an acknowledged
duplicate packet is received.
@@ -690,7 +691,7 @@ The TCP stack receives a DSACK, which indicate an out of order
duplicate packet is received.
invalid SACK and DSACK
-====================
+======================
When a SACK (or DSACK) block is invalid, a corresponding counter would
be updated. The validation method is base on the start/end sequence
number of the SACK block. For more details, please refer the comment
@@ -704,11 +705,13 @@ explaination:
.. _Add counters for discarded SACK blocks: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=18f02545a9a16c9a89778b91a162ad16d510bb32
* TcpExtTCPSACKDiscard
+
This counter indicates how many SACK blocks are invalid. If the invalid
SACK block is caused by ACK recording, the TCP stack will only ignore
it and won't update this counter.
* TcpExtTCPDSACKIgnoredOld and TcpExtTCPDSACKIgnoredNoUndo
+
When a DSACK block is invalid, one of these two counters would be
updated. Which counter will be updated depends on the undo_marker flag
of the TCP socket. If the undo_marker is not set, the TCP stack isn't
@@ -719,7 +722,7 @@ will be updated. If the undo_marker is set, TcpExtTCPDSACKIgnoredOld
will be updated. As implied in its name, it might be an old packet.
SACK shift
-=========
+==========
The linux networking stack stores data in sk_buff struct (skb for
short). If a SACK block acrosses multiple skb, the TCP stack will try
to re-arrange data in these skb. E.g. if a SACK block acknowledges seq
@@ -730,12 +733,15 @@ seq 14 to 20. All data in skb2 will be moved to skb1, and skb2 will be
discard, this operation is 'merge'.
* TcpExtTCPSackShifted
+
A skb is shifted
* TcpExtTCPSackMerged
+
A skb is merged
* TcpExtTCPSackShiftFallback
+
A skb should be shifted or merged, but the TCP stack doesn't do it for
some reasons.
diff --git a/Documentation/powerpc/DAWR-POWER9.txt b/Documentation/powerpc/DAWR-POWER9.txt
index 2feaa6619658..bdec03650941 100644
--- a/Documentation/powerpc/DAWR-POWER9.txt
+++ b/Documentation/powerpc/DAWR-POWER9.txt
@@ -56,3 +56,35 @@ POWER9. Loads and stores to the watchpoint locations will not be
trapped in GDB. The watchpoint is remembered, so if the guest is
migrated back to the POWER8 host, it will start working again.
+Force enabling the DAWR
+=============================
+Kernels (since ~v5.2) have an option to force enable the DAWR via:
+
+ echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
+
+This enables the DAWR even on POWER9.
+
+This is a dangerous setting, USE AT YOUR OWN RISK.
+
+Some users may not care about a bad user crashing their box
+(ie. single user/desktop systems) and really want the DAWR. This
+allows them to force enable DAWR.
+
+This flag can also be used to disable DAWR access. Once this is
+cleared, all DAWR access should be cleared immediately and your
+machine once again safe from crashing.
+
+Userspace may get confused by toggling this. If DAWR is force
+enabled/disabled between getting the number of breakpoints (via
+PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
+inconsistent view of what's available. Similarly for guests.
+
+For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
+enabled in the host AND the guest. For this reason, this won't work on
+POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
+dawr_enable_dangerous file will fail if the hypervisor doesn't support
+writing the DAWR.
+
+To double check the DAWR is working, run this kernel selftest:
+ tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+Any errors/failures/skips mean something is wrong.
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index fac1887f25b5..73a501eb9291 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -5,25 +5,32 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation
----------------------
The kvm API is a set of ioctls that are issued to control various aspects
-of a virtual machine. The ioctls belong to three classes
+of a virtual machine. The ioctls belong to three classes:
- System ioctls: These query and set global attributes which affect the
whole kvm subsystem. In addition a system ioctl is used to create
- virtual machines
+ virtual machines.
- VM ioctls: These query and set attributes that affect an entire virtual
machine, for example memory layout. In addition a VM ioctl is used to
- create virtual cpus (vcpus).
+ create virtual cpus (vcpus) and devices.
- Only run VM ioctls from the same process (address space) that was used
- to create the VM.
+ VM ioctls must be issued from the same process (address space) that was
+ used to create the VM.
- vcpu ioctls: These query and set attributes that control the operation
of a single virtual cpu.
- Only run vcpu ioctls from the same thread that was used to create the
- vcpu.
+ vcpu ioctls should be issued from the same thread that was used to create
+ the vcpu, except for asynchronous vcpu ioctl that are marked as such in
+ the documentation. Otherwise, the first ioctl after switching threads
+ could see a performance impact.
+ - device ioctls: These query and set attributes that control the operation
+ of a single device.
+
+ device ioctls must be issued from the same process (address space) that
+ was used to create the VM.
2. File descriptors
-------------------
@@ -32,18 +39,18 @@ The kvm API is centered around file descriptors. An initial
open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this
handle will create a VM file descriptor which can be used to issue VM
-ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
-and return a file descriptor pointing to it. Finally, ioctls on a vcpu
-fd can be used to control the vcpu, including the important task of
-actually running guest code.
+ioctls. A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will
+create a virtual cpu or device and return a file descriptor pointing to
+the new resource. Finally, ioctls on a vcpu or device fd can be used
+to control the vcpu or device. For vcpus, this includes the important
+task of actually running guest code.
In general file descriptors can be migrated among processes by means
of fork() and the SCM_RIGHTS facility of unix domain socket. These
kinds of tricks are explicitly not supported by kvm. While they will
not cause harm to the host, their actual behavior is not guaranteed by
-the API. The only supported use is one virtual machine per process,
-and one vcpu per thread.
-
+the API. See "General description" for details on the ioctl usage
+model that is supported by KVM.
It is important to note that althought VM ioctls may only be issued from
the process that created the VM, a VM's lifecycle is associated with its
@@ -323,7 +330,7 @@ They must be less than the value that KVM_CHECK_EXTENSION returns for
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
The bits in the dirty bitmap are cleared before the ioctl returns, unless
-KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information,
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information,
see the description of the capability.
4.9 KVM_SET_MEMORY_ALIAS
@@ -515,11 +522,15 @@ c) KVM_INTERRUPT_SET_LEVEL
Note that any value for 'irq' other than the ones stated above is invalid
and incurs unexpected behavior.
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
MIPS:
Queues an external interrupt to be injected into the virtual CPU. A negative
interrupt number dequeues the interrupt.
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
4.17 KVM_DEBUG_GUEST
@@ -1086,14 +1097,11 @@ struct kvm_userspace_memory_region {
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
-This ioctl allows the user to create or modify a guest physical memory
-slot. When changing an existing slot, it may be moved in the guest
-physical memory space, or its flags may be modified. It may not be
-resized. Slots may not overlap in guest physical address space.
-Bits 0-15 of "slot" specifies the slot id and this value should be
-less than the maximum number of user memory slots supported per VM.
-The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
-if this capability is supported by the architecture.
+This ioctl allows the user to create, modify or delete a guest physical
+memory slot. Bits 0-15 of "slot" specify the slot id and this value
+should be less than the maximum number of user memory slots supported per
+VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS.
+Slots may not overlap in guest physical address space.
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
specifies the address space which is being modified. They must be
@@ -1102,6 +1110,10 @@ KVM_CAP_MULTI_ADDRESS_SPACE capability. Slots in separate address spaces
are unrelated; the restriction on overlapping slots only applies within
each address space.
+Deleting a slot is done by passing zero for memory_size. When changing
+an existing slot, it may be moved in the guest physical memory space,
+or its flags may be modified, but it may not be resized.
+
Memory for the region is taken starting at the address denoted by the
field userspace_addr, which must point at user addressable memory for
the entire memory slot size. Any object may back this memory, including
@@ -1961,6 +1973,7 @@ registers, find a list below:
PPC | KVM_REG_PPC_TLB3PS | 32
PPC | KVM_REG_PPC_EPTCFG | 32
PPC | KVM_REG_PPC_ICP_STATE | 64
+ PPC | KVM_REG_PPC_VP_STATE | 128
PPC | KVM_REG_PPC_TB_OFFSET | 64
PPC | KVM_REG_PPC_SPMC1 | 32
PPC | KVM_REG_PPC_SPMC2 | 32
@@ -2594,7 +2607,7 @@ KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm,
machine checks needing further payload are not
supported by this ioctl)
-Note that the vcpu ioctl is asynchronous to vcpu execution.
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
4.78 KVM_PPC_GET_HTAB_FD
@@ -3186,8 +3199,7 @@ KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
KVM_S390_MCHK - machine check interrupt; parameters in .mchk
-
-Note that the vcpu ioctl is asynchronous to vcpu execution.
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
4.94 KVM_S390_GET_IRQ_STATE
@@ -3924,7 +3936,7 @@ to I/O ports.
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
-Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
Architectures: x86
Type: vm ioctl
Parameters: struct kvm_dirty_log (in)
@@ -3945,8 +3957,9 @@ The ioctl clears the dirty status of pages in a memory slot, according to
the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
field. Bit 0 of the bitmap corresponds to page "first_page" in the
memory slot, and num_pages is the size in bits of the input bitmap.
-Both first_page and num_pages must be a multiple of 64. For each bit
-that is set in the input bitmap, the corresponding page is marked "clean"
+first_page must be a multiple of 64; num_pages must also be a multiple of
+64 unless first_page + num_pages is the size of the memory slot. For each
+bit that is set in the input bitmap, the corresponding page is marked "clean"
in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
(for example via write-protection, or by clearing the dirty bit in
a page table entry).
@@ -3956,10 +3969,10 @@ the address space for which you want to return the dirty bitmap.
They must be less than the value that KVM_CHECK_EXTENSION returns for
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
-This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
is enabled; for more information, see the description of the capability.
However, it can always be used as long as KVM_CHECK_EXTENSION confirms
-that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present.
+that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
4.118 KVM_GET_SUPPORTED_HV_CPUID
@@ -4653,6 +4666,15 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
+6.75 KVM_CAP_PPC_IRQ_XIVE
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the XIVE device fd
+ args[1] is the XIVE CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XIVE device.
+
7. Capabilities that can be enabled on VMs
------------------------------------------
@@ -4946,7 +4968,7 @@ and injected exceptions.
* For the new DR6 bits, note that bit 16 is set iff the #DB exception
will clear DR6.RTM.
-7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
Architectures: all
Parameters: args[0] whether feature should be enabled or not
@@ -4969,6 +4991,11 @@ while userspace can see false reports of dirty pages. Manual reprotection
helps reducing this time, improving guest performance and reducing the
number of dirty log false positives.
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
+it hard or impossible to use it correctly. The availability of
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed.
+Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT.
8. Other capabilities.
----------------------
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 95ca68d663a4..4ffb82b02468 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -141,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc {
u8 pcc[16]; # valid with Message-Security-Assist-Extension 4
u8 ppno[16]; # valid with Message-Security-Assist-Extension 5
u8 kma[16]; # valid with Message-Security-Assist-Extension 8
- u8 reserved[1808]; # reserved for future instructions
+ u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
+ u8 reserved[1792]; # reserved for future instructions
};
Parameters: address of a buffer to load the subfunction blocks from.
diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt
new file mode 100644
index 000000000000..9a24a4525253
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/xive.txt
@@ -0,0 +1,197 @@
+POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
+==========================================================
+
+Device types supported:
+ KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
+
+This device acts as a VM interrupt controller. It provides the KVM
+interface to configure the interrupt sources of a VM in the underlying
+POWER9 XIVE interrupt controller.
+
+Only one XIVE instance may be instantiated. A guest XIVE device
+requires a POWER9 host and the guest OS should have support for the
+XIVE native exploitation interrupt mode. If not, it should run using
+the legacy interrupt mode, referred as XICS (POWER7/8).
+
+* Device Mappings
+
+ The KVM device exposes different MMIO ranges of the XIVE HW which
+ are required for interrupt management. These are exposed to the
+ guest in VMAs populated with a custom VM fault handler.
+
+ 1. Thread Interrupt Management Area (TIMA)
+
+ Each thread has an associated Thread Interrupt Management context
+ composed of a set of registers. These registers let the thread
+ handle priority management and interrupt acknowledgment. The most
+ important are :
+
+ - Interrupt Pending Buffer (IPB)
+ - Current Processor Priority (CPPR)
+ - Notification Source Register (NSR)
+
+ They are exposed to software in four different pages each proposing
+ a view with a different privilege. The first page is for the
+ physical thread context and the second for the hypervisor. Only the
+ third (operating system) and the fourth (user level) are exposed the
+ guest.
+
+ 2. Event State Buffer (ESB)
+
+ Each source is associated with an Event State Buffer (ESB) with
+ either a pair of even/odd pair of pages which provides commands to
+ manage the source: to trigger, to EOI, to turn off the source for
+ instance.
+
+ 3. Device pass-through
+
+ When a device is passed-through into the guest, the source
+ interrupts are from a different HW controller (PHB4) and the ESB
+ pages exposed to the guest should accommadate this change.
+
+ The passthru_irq helpers, kvmppc_xive_set_mapped() and
+ kvmppc_xive_clr_mapped() are called when the device HW irqs are
+ mapped into or unmapped from the guest IRQ number space. The KVM
+ device extends these helpers to clear the ESB pages of the guest IRQ
+ number being mapped and then lets the VM fault handler repopulate.
+ The handler will insert the ESB page corresponding to the HW
+ interrupt of the device being passed-through or the initial IPI ESB
+ page if the device has being removed.
+
+ The ESB remapping is fully transparent to the guest and the OS
+ device driver. All handling is done within VFIO and the above
+ helpers in KVM-PPC.
+
+* Groups:
+
+ 1. KVM_DEV_XIVE_GRP_CTRL
+ Provides global controls on the device
+ Attributes:
+ 1.1 KVM_DEV_XIVE_RESET (write only)
+ Resets the interrupt controller configuration for sources and event
+ queues. To be used by kexec and kdump.
+ Errors: none
+
+ 1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
+ Sync all the sources and queues and mark the EQ pages dirty. This
+ to make sure that a consistent memory state is captured when
+ migrating the VM.
+ Errors: none
+
+ 2. KVM_DEV_XIVE_GRP_SOURCE (write only)
+ Initializes a new source in the XIVE device and mask it.
+ Attributes:
+ Interrupt source number (64-bit)
+ The kvm_device_attr.addr points to a __u64 value:
+ bits: | 63 .... 2 | 1 | 0
+ values: | unused | level | type
+ - type: 0:MSI 1:LSI
+ - level: assertion level in case of an LSI.
+ Errors:
+ -E2BIG: Interrupt source number is out of range
+ -ENOMEM: Could not create a new source block
+ -EFAULT: Invalid user pointer for attr->addr.
+ -ENXIO: Could not allocate underlying HW interrupt
+
+ 3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
+ Configures source targeting
+ Attributes:
+ Interrupt source number (64-bit)
+ The kvm_device_attr.addr points to a __u64 value:
+ bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
+ values: | eisn | mask | server | priority
+ - priority: 0-7 interrupt priority level
+ - server: CPU number chosen to handle the interrupt
+ - mask: mask flag (unused)
+ - eisn: Effective Interrupt Source Number
+ Errors:
+ -ENOENT: Unknown source number
+ -EINVAL: Not initialized source number
+ -EINVAL: Invalid priority
+ -EINVAL: Invalid CPU number.
+ -EFAULT: Invalid user pointer for attr->addr.
+ -ENXIO: CPU event queues not configured or configuration of the
+ underlying HW interrupt failed
+ -EBUSY: No CPU available to serve interrupt
+
+ 4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
+ Configures an event queue of a CPU
+ Attributes:
+ EQ descriptor identifier (64-bit)
+ The EQ descriptor identifier is a tuple (server, priority) :
+ bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
+ values: | unused | server | priority
+ The kvm_device_attr.addr points to :
+ struct kvm_ppc_xive_eq {
+ __u32 flags;
+ __u32 qshift;
+ __u64 qaddr;
+ __u32 qtoggle;
+ __u32 qindex;
+ __u8 pad[40];
+ };
+ - flags: queue flags
+ KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
+ forces notification without using the coalescing mechanism
+ provided by the XIVE END ESBs.
+ - qshift: queue size (power of 2)
+ - qaddr: real address of queue
+ - qtoggle: current queue toggle bit
+ - qindex: current queue index
+ - pad: reserved for future use
+ Errors:
+ -ENOENT: Invalid CPU number
+ -EINVAL: Invalid priority
+ -EINVAL: Invalid flags
+ -EINVAL: Invalid queue size
+ -EINVAL: Invalid queue address
+ -EFAULT: Invalid user pointer for attr->addr.
+ -EIO: Configuration of the underlying HW failed
+
+ 5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
+ Synchronize the source to flush event notifications
+ Attributes:
+ Interrupt source number (64-bit)
+ Errors:
+ -ENOENT: Unknown source number
+ -EINVAL: Not initialized source number
+
+* VCPU state
+
+ The XIVE IC maintains VP interrupt state in an internal structure
+ called the NVT. When a VP is not dispatched on a HW processor
+ thread, this structure can be updated by HW if the VP is the target
+ of an event notification.
+
+ It is important for migration to capture the cached IPB from the NVT
+ as it synthesizes the priorities of the pending interrupts. We
+ capture a bit more to report debug information.
+
+ KVM_REG_PPC_VP_STATE (2 * 64bits)
+ bits: | 63 .... 32 | 31 .... 0 |
+ values: | TIMA word0 | TIMA word1 |
+ bits: | 127 .......... 64 |
+ values: | unused |
+
+* Migration:
+
+ Saving the state of a VM using the XIVE native exploitation mode
+ should follow a specific sequence. When the VM is stopped :
+
+ 1. Mask all sources (PQ=01) to stop the flow of events.
+
+ 2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to
+ flush any in-flight event notification and to stabilize the EQs. At
+ this stage, the EQ pages are marked dirty to make sure they are
+ transferred in the migration sequence.
+
+ 3. Capture the state of the source targeting, the EQs configuration
+ and the state of thread interrupt context registers.
+
+ Restore is similar :
+
+ 1. Restore the EQ configuration. As targeting depends on it.
+ 2. Restore targeting
+ 3. Restore the thread interrupt contexts
+ 4. Restore the source states
+ 5. Let the vCPU run
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index f365102c80f5..2efe0efc516e 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -142,7 +142,7 @@ Shadow pages contain the following information:
If clear, this page corresponds to a guest page table denoted by the gfn
field.
role.quadrant:
- When role.cr4_pae=0, the guest uses 32-bit gptes while the host uses 64-bit
+ When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit
sptes. That means a guest page table contains more ptes than the host,
so multiple shadow pages are needed to shadow one guest page.
For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
@@ -158,9 +158,9 @@ Shadow pages contain the following information:
The page is invalid and should not be used. It is a root page that is
currently pinned (by a cpu hardware register pointing to it); once it is
unpinned it will be destroyed.
- role.cr4_pae:
- Contains the value of cr4.pae for which the page is valid (e.g. whether
- 32-bit or 64-bit gptes are in use).
+ role.gpte_is_8_bytes:
+ Reflects the size of the guest PTE for which the page is valid, i.e. '1'
+ if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
role.nxe:
Contains the value of efer.nxe for which the page is valid.
role.cr0_wp:
@@ -173,6 +173,9 @@ Shadow pages contain the following information:
Contains the value of cr4.smap && !cr0.wp for which the page is valid
(pages for which this is true are different from other pages; see the
treatment of cr0.wp=0 below).
+ role.ept_sp:
+ This is a virtual flag to denote a shadowed nested EPT page. ept_sp
+ is true if "cr0_wp && smap_andnot_wp", an otherwise invalid combination.
role.smm:
Is 1 if the page is valid in system management mode. This field
determines which of the kvm_memslots array was used to build this
diff --git a/MAINTAINERS b/MAINTAINERS
index 3e5a5d263f29..27b0de13506c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1893,14 +1893,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
ARM/NUVOTON NPCM ARCHITECTURE
M: Avi Fishman <avifishman70@gmail.com>
M: Tomer Maimon <tmaimon77@gmail.com>
+M: Tali Perry <tali.perry1@gmail.com>
R: Patrick Venture <venture@google.com>
R: Nancy Yuen <yuenn@google.com>
-R: Brendan Higgins <brendanhiggins@google.com>
+R: Benjamin Fair <benjaminfair@google.com>
L: openbmc@lists.ozlabs.org (moderated for non-subscribers)
S: Supported
F: arch/arm/mach-npcm/
F: arch/arm/boot/dts/nuvoton-npcm*
-F: include/dt-bindings/clock/nuvoton,npcm7xx-clks.h
+F: include/dt-bindings/clock/nuvoton,npcm7xx-clock.h
F: drivers/*/*npcm*
F: Documentation/devicetree/bindings/*/*npcm*
F: Documentation/devicetree/bindings/*/*/*npcm*
@@ -2356,7 +2357,7 @@ F: arch/arm/mm/cache-uniphier.c
F: arch/arm64/boot/dts/socionext/uniphier*
F: drivers/bus/uniphier-system-bus.c
F: drivers/clk/uniphier/
-F: drivers/dmaengine/uniphier-mdmac.c
+F: drivers/dma/uniphier-mdmac.c
F: drivers/gpio/gpio-uniphier.c
F: drivers/i2c/busses/i2c-uniphier*
F: drivers/irqchip/irq-uniphier-aidet.c
@@ -4129,7 +4130,7 @@ F: drivers/cpuidle/*
F: include/linux/cpuidle.h
CRAMFS FILESYSTEM
-M: Nicolas Pitre <nico@linaro.org>
+M: Nicolas Pitre <nico@fluxnic.net>
S: Maintained
F: Documentation/filesystems/cramfs.txt
F: fs/cramfs/
@@ -5833,7 +5834,7 @@ L: netdev@vger.kernel.org
S: Maintained
F: Documentation/ABI/testing/sysfs-bus-mdio
F: Documentation/devicetree/bindings/net/mdio*
-F: Documentation/networking/phy.txt
+F: Documentation/networking/phy.rst
F: drivers/net/phy/
F: drivers/of/of_mdio.c
F: drivers/of/of_net.c
@@ -6408,7 +6409,6 @@ L: linux-kernel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
S: Maintained
F: kernel/futex.c
-F: kernel/futex_compat.c
F: include/asm-generic/futex.h
F: include/linux/futex.h
F: include/uapi/linux/futex.h
@@ -7516,7 +7516,7 @@ F: include/net/mac802154.h
F: include/net/af_ieee802154.h
F: include/net/cfg802154.h
F: include/net/ieee802154_netdev.h
-F: Documentation/networking/ieee802154.txt
+F: Documentation/networking/ieee802154.rst
IFE PROTOCOL
M: Yotam Gigi <yotam.gi@gmail.com>
@@ -13982,7 +13982,7 @@ F: drivers/media/rc/serial_ir.c
SFC NETWORK DRIVER
M: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
M: Edward Cree <ecree@solarflare.com>
-M: Bert Kenward <bkenward@solarflare.com>
+M: Martin Habets <mhabets@solarflare.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/sfc/
diff --git a/Makefile b/Makefile
index c0a34064c574..15c8251d4d5e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 1
SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc4
NAME = Shy Crocodile
# *DOCUMENTATION*
@@ -31,26 +31,12 @@ _all:
# descending is started. They are now explicitly listed as the
# prepare rule.
-# Ugly workaround for Debian make-kpkg:
-# make-kpkg directly includes the top Makefile of Linux kernel. In such a case,
-# skip sub-make to support debian_* targets in ruleset/kernel_version.mk, but
-# displays warning to discourage such abusage.
-ifneq ($(word 2, $(MAKEFILE_LIST)),)
-$(warning Do not include top Makefile of Linux Kernel)
-sub-make-done := 1
-MAKEFLAGS += -rR
-endif
-
-ifneq ($(sub-make-done),1)
+ifneq ($(sub_make_done),1)
# Do not use make's built-in rules and variables
# (this increases performance and avoids hard-to-debug behaviour)
MAKEFLAGS += -rR
-# 'MAKEFLAGS += -rR' does not become immediately effective for old
-# GNU Make versions. Cancel implicit rules for this Makefile.
-$(lastword $(MAKEFILE_LIST)): ;
-
# Avoid funny character set dependencies
unexport LC_ALL
LC_COLLATE=C
@@ -153,6 +139,7 @@ $(if $(KBUILD_OUTPUT),, \
# 'sub-make' below.
MAKEFLAGS += --include-dir=$(CURDIR)
+need-sub-make := 1
else
# Do not print "Entering directory ..." at all for in-tree build.
@@ -160,6 +147,18 @@ MAKEFLAGS += --no-print-directory
endif # ifneq ($(KBUILD_OUTPUT),)
+ifneq ($(filter 3.%,$(MAKE_VERSION)),)
+# 'MAKEFLAGS += -rR' does not immediately become effective for GNU Make 3.x
+# We need to invoke sub-make to avoid implicit rules in the top Makefile.
+need-sub-make := 1
+# Cancel implicit rules for this Makefile.
+$(lastword $(MAKEFILE_LIST)): ;
+endif
+
+export sub_make_done := 1
+
+ifeq ($(need-sub-make),1)
+
PHONY += $(MAKECMDGOALS) sub-make
$(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make
@@ -167,12 +166,15 @@ $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make
# Invoke a second make in the output directory, passing relevant variables
sub-make:
- $(Q)$(MAKE) sub-make-done=1 \
+ $(Q)$(MAKE) \
$(if $(KBUILD_OUTPUT),-C $(KBUILD_OUTPUT) KBUILD_SRC=$(CURDIR)) \
-f $(CURDIR)/Makefile $(filter-out _all sub-make,$(MAKECMDGOALS))
-else # sub-make-done
+endif # need-sub-make
+endif # sub_make_done
+
# We process the rest of the Makefile if this is the final invocation of make
+ifeq ($(need-sub-make),)
# Do not print "Entering directory ...",
# but we want to display it when entering to the output directory
@@ -497,7 +499,8 @@ outputmakefile:
ifneq ($(KBUILD_SRC),)
$(Q)ln -fsn $(srctree) source
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile $(srctree)
- $(Q){ echo "# this is build directory, ignore it"; echo "*"; } > .gitignore
+ $(Q)test -e .gitignore || \
+ { echo "# this is build directory, ignore it"; echo "*"; } > .gitignore
endif
ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
@@ -677,7 +680,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context)
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS += $(call cc-option,-Oz,-Os)
+KBUILD_CFLAGS += -Os
else
KBUILD_CFLAGS += -O2
endif
@@ -950,9 +953,11 @@ mod_sign_cmd = true
endif
export mod_sign_cmd
+HOST_LIBELF_LIBS = $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf)
+
ifdef CONFIG_STACK_VALIDATION
has_libelf := $(call try-run,\
- echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf -,1,0)
+ echo "int main() {}" | $(HOSTCC) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0)
ifeq ($(has_libelf),1)
objtool_target := tools/objtool FORCE
else
@@ -1757,7 +1762,7 @@ existing-targets := $(wildcard $(sort $(targets)))
endif # ifeq ($(config-targets),1)
endif # ifeq ($(mixed-targets),1)
-endif # sub-make-done
+endif # need-sub-make
PHONY += FORCE
FORCE:
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index dc0ab28baca1..70b783333965 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += exec.h
generic-y += export.h
generic-y += fb.h
generic-y += irq_work.h
+generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += preempt.h
diff --git a/arch/alpha/include/uapi/asm/kvm_para.h b/arch/alpha/include/uapi/asm/kvm_para.h
deleted file mode 100644
index baacc4996d18..000000000000
--- a/arch/alpha/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/kvm_para.h>
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index b41f8881ecc8..decc306a3b52 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += hardirq.h
generic-y += hw_irq.h
generic-y += irq_regs.h
generic-y += irq_work.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 29de09804306..c7a4201ed62b 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -55,12 +55,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
*/
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
unsigned long *inside_ptregs = &(regs->r0);
- inside_ptregs -= i;
-
- BUG_ON((i + n) > 6);
+ unsigned int n = 6;
+ unsigned int i = 0;
while (n--) {
args[i++] = (*inside_ptregs);
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
generic-y += ucontext.h
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 054ead960f98..850b4805e2d1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -596,6 +596,7 @@ config ARCH_DAVINCI
select HAVE_IDE
select PM_GENERIC_DOMAINS if PM
select PM_GENERIC_DOMAINS_OF if PM && OF
+ select REGMAP_MMIO
select RESET_CONTROLLER
select SPARSE_IRQ
select USE_OF
diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts
index dce5be5df97b..edcff79879e7 100644
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@@ -57,6 +57,24 @@
enable-active-high;
};
+ /* TPS79501 */
+ v1_8d_reg: fixedregulator-v1_8d {
+ compatible = "regulator-fixed";
+ regulator-name = "v1_8d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ /* TPS79501 */
+ v3_3d_reg: fixedregulator-v3_3d {
+ compatible = "regulator-fixed";
+ regulator-name = "v3_3d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
matrix_keypad: matrix_keypad0 {
compatible = "gpio-matrix-keypad";
debounce-delay-ms = <5>;
@@ -499,10 +517,10 @@
status = "okay";
/* Regulators */
- AVDD-supply = <&vaux2_reg>;
- IOVDD-supply = <&vaux2_reg>;
- DRVDD-supply = <&vaux2_reg>;
- DVDD-supply = <&vbat>;
+ AVDD-supply = <&v3_3d_reg>;
+ IOVDD-supply = <&v3_3d_reg>;
+ DRVDD-supply = <&v3_3d_reg>;
+ DVDD-supply = <&v1_8d_reg>;
};
};
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index b128998097ce..2c2d8b5b8cf5 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -73,6 +73,24 @@
enable-active-high;
};
+ /* TPS79518 */
+ v1_8d_reg: fixedregulator-v1_8d {
+ compatible = "regulator-fixed";
+ regulator-name = "v1_8d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ /* TPS78633 */
+ v3_3d_reg: fixedregulator-v3_3d {
+ compatible = "regulator-fixed";
+ regulator-name = "v3_3d";
+ vin-supply = <&vbat>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
leds {
pinctrl-names = "default";
pinctrl-0 = <&user_leds_s0>;
@@ -501,10 +519,10 @@
status = "okay";
/* Regulators */
- AVDD-supply = <&vaux2_reg>;
- IOVDD-supply = <&vaux2_reg>;
- DRVDD-supply = <&vaux2_reg>;
- DVDD-supply = <&vbat>;
+ AVDD-supply = <&v3_3d_reg>;
+ IOVDD-supply = <&v3_3d_reg>;
+ DRVDD-supply = <&v3_3d_reg>;
+ DVDD-supply = <&v1_8d_reg>;
};
};
diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi
index f459ec316a22..ca6d9f02a800 100644
--- a/arch/arm/boot/dts/am33xx-l4.dtsi
+++ b/arch/arm/boot/dts/am33xx-l4.dtsi
@@ -1762,7 +1762,7 @@
reg = <0xcc000 0x4>;
reg-names = "rev";
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
- clocks = <&l4ls_clkctrl AM3_D_CAN0_CLKCTRL 0>;
+ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
@@ -1785,7 +1785,7 @@
reg = <0xd0000 0x4>;
reg-names = "rev";
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
- clocks = <&l4ls_clkctrl AM3_D_CAN1_CLKCTRL 0>;
+ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts b/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts
index 5641d162dfdb..28e7513ce617 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts
@@ -93,7 +93,7 @@
};
&hdmi {
- hpd-gpios = <&gpio 46 GPIO_ACTIVE_LOW>;
+ hpd-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>;
};
&pwm {
diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
index b715ab0fa1ff..e8d800fec637 100644
--- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
+++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
@@ -114,9 +114,9 @@
reg = <2>;
};
- switch@0 {
+ switch@10 {
compatible = "qca,qca8334";
- reg = <0>;
+ reg = <10>;
switch_ports: ports {
#address-cells = <1>;
@@ -125,7 +125,7 @@
ethphy0: port@0 {
reg = <0>;
label = "cpu";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
ethernet = <&fec>;
fixed-link {
diff --git a/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi b/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi
index 1d1b4bd0670f..a4217f564a53 100644
--- a/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi
@@ -264,7 +264,7 @@
pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
vmcc-supply = <&reg_sd3_vmmc>;
cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
- bus-witdh = <4>;
+ bus-width = <4>;
no-1-8-v;
status = "okay";
};
@@ -275,7 +275,7 @@
pinctrl-1 = <&pinctrl_usdhc4_100mhz>;
pinctrl-2 = <&pinctrl_usdhc4_200mhz>;
vmcc-supply = <&reg_sd4_vmmc>;
- bus-witdh = <8>;
+ bus-width = <8>;
no-1-8-v;
non-removable;
status = "okay";
diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
index 433bf09a1954..027df06c5dc7 100644
--- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
@@ -91,6 +91,7 @@
pinctrl-0 = <&pinctrl_enet>;
phy-handle = <&ethphy>;
phy-mode = "rgmii";
+ phy-reset-duration = <10>; /* in msecs */
phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>;
phy-supply = <&vdd_eth_io_reg>;
status = "disabled";
diff --git a/arch/arm/boot/dts/imx6ull-pinfunc-snvs.h b/arch/arm/boot/dts/imx6ull-pinfunc-snvs.h
index f6fb6783c193..54cfe72295aa 100644
--- a/arch/arm/boot/dts/imx6ull-pinfunc-snvs.h
+++ b/arch/arm/boot/dts/imx6ull-pinfunc-snvs.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2016 Freescale Semiconductor, Inc.
* Copyright (C) 2017 NXP
diff --git a/arch/arm/boot/dts/rk3288-tinker.dtsi b/arch/arm/boot/dts/rk3288-tinker.dtsi
index aa107ee41b8b..ef653c3209bc 100644
--- a/arch/arm/boot/dts/rk3288-tinker.dtsi
+++ b/arch/arm/boot/dts/rk3288-tinker.dtsi
@@ -254,6 +254,7 @@
};
vccio_sd: LDO_REG5 {
+ regulator-boot-on;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-name = "vccio_sd";
@@ -430,7 +431,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
- card-detect-delay = <200>;
+ broken-cd;
disable-wp; /* wp not hooked up */
pinctrl-names = "default";
pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>;
diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 0bc2409f6903..192dbc089ade 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
@@ -25,8 +25,6 @@
gpio_keys: gpio-keys {
compatible = "gpio-keys";
- #address-cells = <1>;
- #size-cells = <0>;
pinctrl-names = "default";
pinctrl-0 = <&pwr_key_l>;
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index ca7d52daa8fb..a024d1e7e74c 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -70,7 +70,7 @@
compatible = "arm,cortex-a12";
reg = <0x501>;
resets = <&cru SRST_CORE1>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -80,7 +80,7 @@
compatible = "arm,cortex-a12";
reg = <0x502>;
resets = <&cru SRST_CORE2>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -90,7 +90,7 @@
compatible = "arm,cortex-a12";
reg = <0x503>;
resets = <&cru SRST_CORE3>;
- operating-points = <&cpu_opp_table>;
+ operating-points-v2 = <&cpu_opp_table>;
#cooling-cells = <2>; /* min followed by max */
clock-latency = <40000>;
clocks = <&cru ARMCLK>;
@@ -1119,8 +1119,6 @@
clock-names = "ref", "pclk";
power-domains = <&power RK3288_PD_VIO>;
rockchip,grf = <&grf>;
- #address-cells = <1>;
- #size-cells = <0>;
status = "disabled";
ports {
@@ -1282,27 +1280,27 @@
gpu_opp_table: gpu-opp-table {
compatible = "operating-points-v2";
- opp@100000000 {
+ opp-100000000 {
opp-hz = /bits/ 64 <100000000>;
opp-microvolt = <950000>;
};
- opp@200000000 {
+ opp-200000000 {
opp-hz = /bits/ 64 <200000000>;
opp-microvolt = <950000>;
};
- opp@300000000 {
+ opp-300000000 {
opp-hz = /bits/ 64 <300000000>;
opp-microvolt = <1000000>;
};
- opp@400000000 {
+ opp-400000000 {
opp-hz = /bits/ 64 <400000000>;
opp-microvolt = <1100000>;
};
- opp@500000000 {
+ opp-500000000 {
opp-hz = /bits/ 64 <500000000>;
opp-microvolt = <1200000>;
};
- opp@600000000 {
+ opp-600000000 {
opp-hz = /bits/ 64 <600000000>;
opp-microvolt = <1250000>;
};
diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h
index 1c01a6f843d8..28a2e45752fe 100644
--- a/arch/arm/boot/dts/sama5d2-pinfunc.h
+++ b/arch/arm/boot/dts/sama5d2-pinfunc.h
@@ -518,7 +518,7 @@
#define PIN_PC9__GPIO PINMUX_PIN(PIN_PC9, 0, 0)
#define PIN_PC9__FIQ PINMUX_PIN(PIN_PC9, 1, 3)
#define PIN_PC9__GTSUCOMP PINMUX_PIN(PIN_PC9, 2, 1)
-#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 2, 1)
+#define PIN_PC9__ISC_D0 PINMUX_PIN(PIN_PC9, 3, 1)
#define PIN_PC9__TIOA4 PINMUX_PIN(PIN_PC9, 4, 2)
#define PIN_PC10 74
#define PIN_PC10__GPIO PINMUX_PIN(PIN_PC10, 0, 0)
diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig
index 8661dd9b064a..b37f8e675e40 100644
--- a/arch/arm/configs/imx_v4_v5_defconfig
+++ b/arch/arm/configs/imx_v4_v5_defconfig
@@ -170,6 +170,9 @@ CONFIG_IMX_SDMA=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_IIO=y
CONFIG_FSL_MX25_ADC=y
+CONFIG_PWM=y
+CONFIG_PWM_IMX1=y
+CONFIG_PWM_IMX27=y
CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_VFAT_FS=y
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 5586a5074a96..50fb01d70b10 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -398,7 +398,7 @@ CONFIG_MAG3110=y
CONFIG_MPL3115=y
CONFIG_PWM=y
CONFIG_PWM_FSL_FTM=y
-CONFIG_PWM_IMX=y
+CONFIG_PWM_IMX27=y
CONFIG_NVMEM_IMX_OCOTP=y
CONFIG_NVMEM_VF610_OCOTP=y
CONFIG_TEE=y
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 2de96a180166..31de4ab93005 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -381,6 +381,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm,
return ret;
}
+static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
+ const void *data, unsigned long len)
+{
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_write_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
+}
+
static inline void *kvm_get_hyp_vector(void)
{
switch(read_cpuid_part()) {
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
index de2089501b8b..9e11dce55e06 100644
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -75,6 +75,8 @@ static inline bool kvm_stage2_has_pud(struct kvm *kvm)
#define S2_PMD_MASK PMD_MASK
#define S2_PMD_SIZE PMD_SIZE
+#define S2_PUD_MASK PUD_MASK
+#define S2_PUD_SIZE PUD_SIZE
static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
{
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 06dea6bce293..080ce70cab12 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -55,53 +55,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- args[0] = regs->ARM_ORIG_r0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->ARM_r0 + i, n * sizeof(args[0]));
+ args[0] = regs->ARM_ORIG_r0;
+ args++;
+
+ memcpy(args, &regs->ARM_r0 + 1, 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->ARM_ORIG_r0 = args[0];
- args++;
- i++;
- n--;
- }
-
- memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
+ regs->ARM_ORIG_r0 = args[0];
+ args++;
+
+ memcpy(&regs->ARM_r0 + 1, args, 5 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 23b4464c0995..ce8573157774 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -3,3 +3,4 @@
generated-y += unistd-common.h
generated-y += unistd-oabi.h
generated-y += unistd-eabi.h
+generic-y += kvm_para.h
diff --git a/arch/arm/include/uapi/asm/kvm_para.h b/arch/arm/include/uapi/asm/kvm_para.h
deleted file mode 100644
index baacc4996d18..000000000000
--- a/arch/arm/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/kvm_para.h>
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 51e808adb00c..2a757dcaa1a5 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -591,13 +591,13 @@ static int __init at91_pm_backup_init(void)
np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam");
if (!np)
- goto securam_fail;
+ goto securam_fail_no_ref_dev;
pdev = of_find_device_by_node(np);
of_node_put(np);
if (!pdev) {
pr_warn("%s: failed to find securam device!\n", __func__);
- goto securam_fail;
+ goto securam_fail_no_ref_dev;
}
sram_pool = gen_pool_get(&pdev->dev, NULL);
@@ -620,6 +620,8 @@ static int __init at91_pm_backup_init(void)
return 0;
securam_fail:
+ put_device(&pdev->dev);
+securam_fail_no_ref_dev:
iounmap(pm_data.sfrbu);
pm_data.sfrbu = NULL;
return ret;
diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
index bfeb25aaf9a2..326e870d7123 100644
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -16,30 +16,23 @@
#include "cpuidle.h"
#include "hardware.h"
-static atomic_t master = ATOMIC_INIT(0);
-static DEFINE_SPINLOCK(master_lock);
+static int num_idle_cpus = 0;
+static DEFINE_SPINLOCK(cpuidle_lock);
static int imx6q_enter_wait(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- if (atomic_inc_return(&master) == num_online_cpus()) {
- /*
- * With this lock, we prevent other cpu to exit and enter
- * this function again and become the master.
- */
- if (!spin_trylock(&master_lock))
- goto idle;
+ spin_lock(&cpuidle_lock);
+ if (++num_idle_cpus == num_online_cpus())
imx6_set_lpm(WAIT_UNCLOCKED);
- cpu_do_idle();
- imx6_set_lpm(WAIT_CLOCKED);
- spin_unlock(&master_lock);
- goto done;
- }
+ spin_unlock(&cpuidle_lock);
-idle:
cpu_do_idle();
-done:
- atomic_dec(&master);
+
+ spin_lock(&cpuidle_lock);
+ if (num_idle_cpus-- == num_online_cpus())
+ imx6_set_lpm(WAIT_CLOCKED);
+ spin_unlock(&cpuidle_lock);
return index;
}
diff --git a/arch/arm/mach-imx/mach-imx51.c b/arch/arm/mach-imx/mach-imx51.c
index c7169c2f94c4..08c7892866c2 100644
--- a/arch/arm/mach-imx/mach-imx51.c
+++ b/arch/arm/mach-imx/mach-imx51.c
@@ -59,6 +59,7 @@ static void __init imx51_m4if_setup(void)
return;
m4if_base = of_iomap(np, 0);
+ of_node_put(np);
if (!m4if_base) {
pr_err("Unable to map M4IF registers\n");
return;
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 53c316f7301e..fe4932fda01d 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -300,7 +300,7 @@ static struct resource iop13xx_adma_2_resources[] = {
}
};
-static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64);
+static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32);
static struct iop_adma_platform_data iop13xx_adma_0_data = {
.hw_id = 0,
.pool_size = PAGE_SIZE,
@@ -324,7 +324,7 @@ static struct platform_device iop13xx_adma_0_channel = {
.resource = iop13xx_adma_0_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_0_data,
},
};
@@ -336,7 +336,7 @@ static struct platform_device iop13xx_adma_1_channel = {
.resource = iop13xx_adma_1_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_1_data,
},
};
@@ -348,7 +348,7 @@ static struct platform_device iop13xx_adma_2_channel = {
.resource = iop13xx_adma_2_resources,
.dev = {
.dma_mask = &iop13xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop13xx_adma_2_data,
},
};
diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c
index db511ec2b1df..116feb6b261e 100644
--- a/arch/arm/mach-iop13xx/tpmi.c
+++ b/arch/arm/mach-iop13xx/tpmi.c
@@ -152,7 +152,7 @@ static struct resource iop13xx_tpmi_3_resources[] = {
}
};
-u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64);
+u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32);
static struct platform_device iop13xx_tpmi_0_device = {
.name = "iop-tpmi",
.id = 0,
@@ -160,7 +160,7 @@ static struct platform_device iop13xx_tpmi_0_device = {
.resource = iop13xx_tpmi_0_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -171,7 +171,7 @@ static struct platform_device iop13xx_tpmi_1_device = {
.resource = iop13xx_tpmi_1_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -182,7 +182,7 @@ static struct platform_device iop13xx_tpmi_2_device = {
.resource = iop13xx_tpmi_2_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -193,7 +193,7 @@ static struct platform_device iop13xx_tpmi_3_device = {
.resource = iop13xx_tpmi_3_resources,
.dev = {
.dma_mask = &iop13xx_tpmi_mask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
diff --git a/arch/arm/mach-milbeaut/platsmp.c b/arch/arm/mach-milbeaut/platsmp.c
index 591543c81399..3ea880f5fcb7 100644
--- a/arch/arm/mach-milbeaut/platsmp.c
+++ b/arch/arm/mach-milbeaut/platsmp.c
@@ -65,6 +65,7 @@ static void m10v_smp_init(unsigned int max_cpus)
writel(KERNEL_UNBOOT_FLAG, m10v_smp_base + cpu * 4);
}
+#ifdef CONFIG_HOTPLUG_CPU
static void m10v_cpu_die(unsigned int l_cpu)
{
gic_cpu_if_down(0);
@@ -83,12 +84,15 @@ static int m10v_cpu_kill(unsigned int l_cpu)
return 1;
}
+#endif
static struct smp_operations m10v_smp_ops __initdata = {
.smp_prepare_cpus = m10v_smp_init,
.smp_boot_secondary = m10v_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
.cpu_die = m10v_cpu_die,
.cpu_kill = m10v_cpu_kill,
+#endif
};
CPU_METHOD_OF_DECLARE(m10v_smp, "socionext,milbeaut-m10v-smp", &m10v_smp_ops);
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index be30c3c061b4..1b15d593837e 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -182,6 +182,7 @@ static struct resource latch1_resources[] = {
static struct bgpio_pdata latch1_pdata = {
.label = LATCH1_LABEL,
+ .base = -1,
.ngpio = LATCH1_NGPIO,
};
@@ -219,6 +220,7 @@ static struct resource latch2_resources[] = {
static struct bgpio_pdata latch2_pdata = {
.label = LATCH2_LABEL,
+ .base = -1,
.ngpio = LATCH2_NGPIO,
};
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 1444b4b4bd9f..439e143cad7b 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -250,8 +250,10 @@ static int __init omapdss_init_of(void)
if (!node)
return 0;
- if (!of_device_is_available(node))
+ if (!of_device_is_available(node)) {
+ of_node_put(node);
return 0;
+ }
pdev = of_find_device_by_node(node);
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index a4d1f8de3b5b..d9612221e484 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -143,7 +143,7 @@ struct platform_device iop3xx_dma_0_channel = {
.resource = iop3xx_dma_0_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_0_data,
},
};
@@ -155,7 +155,7 @@ struct platform_device iop3xx_dma_1_channel = {
.resource = iop3xx_dma_1_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_dma_1_data,
},
};
@@ -167,7 +167,7 @@ struct platform_device iop3xx_aau_channel = {
.resource = iop3xx_aau_resources,
.dev = {
.dma_mask = &iop3xx_adma_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = (void *) &iop3xx_aau_data,
},
};
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index a6c81ce00f52..8647cb80a93b 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -622,7 +622,7 @@ static struct platform_device orion_xor0_shared = {
.resource = orion_xor0_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor0_pdata,
},
};
@@ -683,7 +683,7 @@ static struct platform_device orion_xor1_shared = {
.resource = orion_xor1_shared_resources,
.dev = {
.dma_mask = &orion_xor_dmamask,
- .coherent_dma_mask = DMA_BIT_MASK(64),
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.platform_data = &orion_xor1_pdata,
},
};
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 70498a033cf5..b5ca9c50876d 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -27,6 +27,7 @@ config ARCH_BCM2835
bool "Broadcom BCM2835 family"
select TIMER_OF
select GPIOLIB
+ select MFD_CORE
select PINCTRL
select PINCTRL_BCM2835
select ARM_AMBA
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 7c649f6b14cb..cd7c76e58b09 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -162,6 +162,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 1>;
+ altr,sysmgr-syscon = <&sysmgr 0x44 0>;
status = "disabled";
};
@@ -179,6 +180,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 2>;
+ altr,sysmgr-syscon = <&sysmgr 0x48 0>;
status = "disabled";
};
@@ -196,6 +198,7 @@
rx-fifo-depth = <16384>;
snps,multicast-filter-bins = <256>;
iommus = <&smmu 3>;
+ altr,sysmgr-syscon = <&sysmgr 0x4c 0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index bb2045be8814..97aeb946ed5e 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -321,7 +321,6 @@
nvidia,default-trim = <0x9>;
nvidia,dqs-trim = <63>;
mmc-hs400-1_8v;
- supports-cqe;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
index 61a0afb74e63..1ea684af99c4 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
@@ -2,7 +2,7 @@
/*
* Device Tree Source for the RZ/G2E (R8A774C0) SoC
*
- * Copyright (C) 2018 Renesas Electronics Corp.
+ * Copyright (C) 2018-2019 Renesas Electronics Corp.
*/
#include <dt-bindings/clock/r8a774c0-cpg-mssr.h>
@@ -1150,9 +1150,8 @@
<&cpg CPG_CORE R8A774C0_CLK_S3D1C>,
<&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
- dmas = <&dmac1 0x5b>, <&dmac1 0x5a>,
- <&dmac2 0x5b>, <&dmac2 0x5a>;
- dma-names = "tx", "rx", "tx", "rx";
+ dmas = <&dmac0 0x5b>, <&dmac0 0x5a>;
+ dma-names = "tx", "rx";
power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>;
resets = <&cpg 202>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
index a69faa60ea4d..d2ad665fe2d9 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
@@ -2,7 +2,7 @@
/*
* Device Tree Source for the R-Car E3 (R8A77990) SoC
*
- * Copyright (C) 2018 Renesas Electronics Corp.
+ * Copyright (C) 2018-2019 Renesas Electronics Corp.
*/
#include <dt-bindings/clock/r8a77990-cpg-mssr.h>
@@ -1067,9 +1067,8 @@
<&cpg CPG_CORE R8A77990_CLK_S3D1C>,
<&scif_clk>;
clock-names = "fck", "brg_int", "scif_clk";
- dmas = <&dmac1 0x5b>, <&dmac1 0x5a>,
- <&dmac2 0x5b>, <&dmac2 0x5a>;
- dma-names = "tx", "rx", "tx", "rx";
+ dmas = <&dmac0 0x5b>, <&dmac0 0x5a>;
+ dma-names = "tx", "rx";
power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
resets = <&cpg 202>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
index 33c44e857247..0e34354b2092 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
@@ -108,8 +108,8 @@
snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;
snps,reset-active-low;
snps,reset-delays-us = <0 10000 50000>;
- tx_delay = <0x25>;
- rx_delay = <0x11>;
+ tx_delay = <0x24>;
+ rx_delay = <0x18>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index 2157a528276b..79b4d1d4b5d6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -46,8 +46,7 @@
vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator {
compatible = "regulator-fixed";
- enable-active-high;
- gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;
+ gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
pinctrl-names = "default";
pinctrl-0 = <&usb20_host_drv>;
regulator-name = "vcc_host1_5v";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 84f14b132e8f..dabef1a21649 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -1445,11 +1445,11 @@
sdmmc0 {
sdmmc0_clk: sdmmc0-clk {
- rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_4ma>;
+ rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_8ma>;
};
sdmmc0_cmd: sdmmc0-cmd {
- rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_8ma>;
};
sdmmc0_dectn: sdmmc0-dectn {
@@ -1461,14 +1461,14 @@
};
sdmmc0_bus1: sdmmc0-bus1 {
- rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>;
};
sdmmc0_bus4: sdmmc0-bus4 {
- rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>,
- <1 RK_PA1 1 &pcfg_pull_up_4ma>,
- <1 RK_PA2 1 &pcfg_pull_up_4ma>,
- <1 RK_PA3 1 &pcfg_pull_up_4ma>;
+ rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA1 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA2 1 &pcfg_pull_up_8ma>,
+ <1 RK_PA3 1 &pcfg_pull_up_8ma>;
};
sdmmc0_gpio: sdmmc0-gpio {
@@ -1642,50 +1642,50 @@
rgmiim1_pins: rgmiim1-pins {
rockchip,pins =
/* mac_txclk */
- <1 RK_PB4 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB4 2 &pcfg_pull_none_8ma>,
/* mac_rxclk */
- <1 RK_PB5 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB5 2 &pcfg_pull_none_4ma>,
/* mac_mdio */
- <1 RK_PC3 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC3 2 &pcfg_pull_none_4ma>,
/* mac_txen */
- <1 RK_PD1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PD1 2 &pcfg_pull_none_8ma>,
/* mac_clk */
- <1 RK_PC5 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC5 2 &pcfg_pull_none_4ma>,
/* mac_rxdv */
- <1 RK_PC6 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC6 2 &pcfg_pull_none_4ma>,
/* mac_mdc */
- <1 RK_PC7 2 &pcfg_pull_none_2ma>,
+ <1 RK_PC7 2 &pcfg_pull_none_4ma>,
/* mac_rxd1 */
- <1 RK_PB2 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB2 2 &pcfg_pull_none_4ma>,
/* mac_rxd0 */
- <1 RK_PB3 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB3 2 &pcfg_pull_none_4ma>,
/* mac_txd1 */
- <1 RK_PB0 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB0 2 &pcfg_pull_none_8ma>,
/* mac_txd0 */
- <1 RK_PB1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PB1 2 &pcfg_pull_none_8ma>,
/* mac_rxd3 */
- <1 RK_PB6 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB6 2 &pcfg_pull_none_4ma>,
/* mac_rxd2 */
- <1 RK_PB7 2 &pcfg_pull_none_2ma>,
+ <1 RK_PB7 2 &pcfg_pull_none_4ma>,
/* mac_txd3 */
- <1 RK_PC0 2 &pcfg_pull_none_12ma>,
+ <1 RK_PC0 2 &pcfg_pull_none_8ma>,
/* mac_txd2 */
- <1 RK_PC1 2 &pcfg_pull_none_12ma>,
+ <1 RK_PC1 2 &pcfg_pull_none_8ma>,
/* mac_txclk */
- <0 RK_PB0 1 &pcfg_pull_none>,
+ <0 RK_PB0 1 &pcfg_pull_none_8ma>,
/* mac_txen */
- <0 RK_PB4 1 &pcfg_pull_none>,
+ <0 RK_PB4 1 &pcfg_pull_none_8ma>,
/* mac_clk */
- <0 RK_PD0 1 &pcfg_pull_none>,
+ <0 RK_PD0 1 &pcfg_pull_none_4ma>,
/* mac_txd1 */
- <0 RK_PC0 1 &pcfg_pull_none>,
+ <0 RK_PC0 1 &pcfg_pull_none_8ma>,
/* mac_txd0 */
- <0 RK_PC1 1 &pcfg_pull_none>,
+ <0 RK_PC1 1 &pcfg_pull_none_8ma>,
/* mac_txd3 */
- <0 RK_PC7 1 &pcfg_pull_none>,
+ <0 RK_PC7 1 &pcfg_pull_none_8ma>,
/* mac_txd2 */
- <0 RK_PC6 1 &pcfg_pull_none>;
+ <0 RK_PC6 1 &pcfg_pull_none_8ma>;
};
rmiim1_pins: rmiim1-pins {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
index 4a543f2117d4..844eac939a97 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
@@ -158,6 +158,7 @@
};
&hdmi {
+ ddc-i2c-bus = <&i2c3>;
pinctrl-names = "default";
pinctrl-0 = <&hdmi_cec>;
status = "okay";
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b0742a16c6c9..ebeefcf835e8 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -445,6 +445,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm,
return ret;
}
+static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
+ const void *data, unsigned long len)
+{
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_write_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
+}
+
#ifdef CONFIG_KVM_INDIRECT_VECTORS
/*
* EL2 vectors can be mapped and rerouted in a number of ways,
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index ad8be16a39c9..a179df3674a1 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -65,52 +65,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_x0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->regs[i], n * sizeof(args[0]));
+ args[0] = regs->orig_x0;
+ args++;
+
+ memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->orig_x0 = args[0];
- args++;
- i++;
- n--;
- }
-
- memcpy(&regs->regs[i], args, n * sizeof(args[0]));
+ regs->orig_x0 = args[0];
+ args++;
+
+ memcpy(&regs->regs[1], args, 5 * sizeof(args[0]));
}
/*
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 5ba4465e44f0..ea94cf8f9dc6 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -94,6 +94,9 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
+ if (!low)
+ return false;
+
if (sp < low || sp >= high)
return false;
@@ -111,6 +114,9 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
+ if (!low)
+ return false;
+
if (sp < low || sp >= high)
return false;
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f8482fe5a190..413d566405d1 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -217,7 +217,7 @@ static void __init request_standard_resources(void)
num_standard_resources = memblock.memory.cnt;
res_size = num_standard_resources * sizeof(*standard_resources);
- standard_resources = memblock_alloc_low(res_size, SMP_CACHE_BYTES);
+ standard_resources = memblock_alloc(res_size, SMP_CACHE_BYTES);
if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f0faf54f5857..1140b4485575 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -272,6 +272,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
int ret = -EINVAL;
bool loaded;
+ /* Reset PMU outside of the non-preemptible section */
+ kvm_pmu_vcpu_reset(vcpu);
+
preempt_disable();
loaded = (vcpu->cpu != -1);
if (loaded)
@@ -335,9 +338,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.reset_state.reset = false;
}
- /* Reset PMU */
- kvm_pmu_vcpu_reset(vcpu);
-
/* Default workaround setup is enabled (if supported) */
if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL)
vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 63b4a1705182..249c9f6f26dc 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index ae2be315ee9c..15ba8599858e 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -46,78 +46,27 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i,
- unsigned int n, unsigned long *args)
+ struct pt_regs *regs,
+ unsigned long *args)
{
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = regs->a4;
- case 1:
- if (!n--)
- break;
- *args++ = regs->b4;
- case 2:
- if (!n--)
- break;
- *args++ = regs->a6;
- case 3:
- if (!n--)
- break;
- *args++ = regs->b6;
- case 4:
- if (!n--)
- break;
- *args++ = regs->a8;
- case 5:
- if (!n--)
- break;
- *args++ = regs->b8;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- }
+ *args++ = regs->a4;
+ *args++ = regs->b4;
+ *args++ = regs->a6;
+ *args++ = regs->b6;
+ *args++ = regs->a8;
+ *args = regs->b8;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- switch (i) {
- case 0:
- if (!n--)
- break;
- regs->a4 = *args++;
- case 1:
- if (!n--)
- break;
- regs->b4 = *args++;
- case 2:
- if (!n--)
- break;
- regs->a6 = *args++;
- case 3:
- if (!n--)
- break;
- regs->b6 = *args++;
- case 4:
- if (!n--)
- break;
- regs->a8 = *args++;
- case 5:
- if (!n--)
- break;
- regs->a9 = *args++;
- case 6:
- if (!n)
- break;
- default:
- BUG();
- }
+ regs->a4 = *args++;
+ regs->b4 = *args++;
+ regs->a6 = *args++;
+ regs->b6 = *args++;
+ regs->a8 = *args++;
+ regs->a9 = *args;
}
#endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
generic-y += ucontext.h
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index d637445737b7..bda0a446c63e 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -43,30 +43,20 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- args[0] = regs->orig_a0;
- args++;
- i++;
- n--;
- }
- memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+ args[0] = regs->orig_a0;
+ args++;
+ memcpy(args, &regs->a1, 5 * sizeof(args[0]));
}
static inline void
syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, const unsigned long *args)
+ const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ regs->orig_a0 = args[0];
+ args++;
+ memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
}
static inline int
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 3e7c8ecf151e..e3dead402e5f 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += linkage.h
generic-y += local.h
generic-y += local64.h
diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h
index 924990401237..ddd483c6ca95 100644
--- a/arch/h8300/include/asm/syscall.h
+++ b/arch/h8300/include/asm/syscall.h
@@ -17,34 +17,14 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- while (n > 0) {
- switch (i) {
- case 0:
- *args++ = regs->er1;
- break;
- case 1:
- *args++ = regs->er2;
- break;
- case 2:
- *args++ = regs->er3;
- break;
- case 3:
- *args++ = regs->er4;
- break;
- case 4:
- *args++ = regs->er5;
- break;
- case 5:
- *args++ = regs->er6;
- break;
- }
- i++;
- n--;
- }
+ *args++ = regs->er1;
+ *args++ = regs->er2;
+ *args++ = regs->er3;
+ *args++ = regs->er4;
+ *args++ = regs->er5;
+ *args = regs->er6;
}
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
generic-y += ucontext.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index b25fd42aa0f4..d046e8ccdf78 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index 4af9c7b6f13a..ae3a1e24fabd 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -37,10 +37,8 @@ static inline long syscall_get_nr(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &(&regs->r00)[i], n * sizeof(args[0]));
+ memcpy(args, &(&regs->r00)[0], 6 * sizeof(args[0]));
}
#endif
diff --git a/arch/hexagon/include/uapi/asm/kvm_para.h b/arch/hexagon/include/uapi/asm/kvm_para.h
deleted file mode 100644
index baacc4996d18..000000000000
--- a/arch/hexagon/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/kvm_para.h>
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 43e21fe3499c..11f191689c9e 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -2,6 +2,7 @@ generated-y += syscall_table.h
generic-y += compat.h
generic-y += exec.h
generic-y += irq_work.h
+generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += preempt.h
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 1d0b875fec44..0d9e7fab4a79 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -59,26 +59,19 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
extern void ia64_syscall_get_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args, int rw);
+ struct pt_regs *regs, unsigned long *args, int rw);
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+ ia64_syscall_get_set_arguments(task, regs, args, 0);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+ ia64_syscall_get_set_arguments(task, regs, args, 1);
}
static inline int syscall_get_arch(void)
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 20018cb883a9..62a9522af51e 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
generated-y += unistd_64.h
-generic-y += kvm_para.h
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 6d50ede0ed69..bf9c24d9ce84 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -2179,12 +2179,11 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
}
void ia64_syscall_get_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args, int rw)
+ struct pt_regs *regs, unsigned long *args, int rw)
{
struct syscall_get_set_args data = {
- .i = i,
- .n = n,
+ .i = 0,
+ .n = 6,
.args = args,
.regs = regs,
.rw = rw,
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 95f8f631c4df..2c359d9e80f6 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 8a7ad40be463..7417847dc438 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
generated-y += unistd_32.h
-generic-y += kvm_para.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 791cc8d54d0a..1a8285c3f693 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -17,6 +17,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += linkage.h
generic-y += local.h
generic-y += local64.h
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 220decd605a4..833d3a53dab3 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -82,18 +82,22 @@ static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
+
while (n--)
*args++ = microblaze_get_syscall_arg(regs, i++);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
+
while (n--)
microblaze_set_syscall_arg(regs, i++, *args++);
}
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 3ce84fbb2678..13f59631c576 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -1,3 +1,2 @@
generated-y += unistd_32.h
-generic-y += kvm_para.h
generic-y += ucontext.h
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
index f607888d2483..184eb65a6ba7 100644
--- a/arch/mips/configs/generic/board-ocelot.config
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -1,6 +1,10 @@
# require CONFIG_CPU_MIPS32_R2=y
CONFIG_LEGACY_BOARD_OCELOT=y
+CONFIG_FIT_IMAGE_FDT_OCELOT=y
+
+CONFIG_BRIDGE=y
+CONFIG_GENERIC_PHY=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
@@ -19,6 +23,8 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_NETDEVICES=y
+CONFIG_NET_SWITCHDEV=y
+CONFIG_NET_DSA=y
CONFIG_MSCC_OCELOT_SWITCH=y
CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y
CONFIG_MDIO_MSCC_MIIM=y
@@ -35,6 +41,8 @@ CONFIG_SPI_DESIGNWARE=y
CONFIG_SPI_DW_MMIO=y
CONFIG_SPI_SPIDEV=y
+CONFIG_PINCTRL_OCELOT=y
+
CONFIG_GPIO_SYSFS=y
CONFIG_POWER_RESET=y
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 6cf8ffb5367e..a2b4748655df 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -116,9 +116,10 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
+ unsigned int i = 0;
+ unsigned int n = 6;
int ret;
/* O32 ABI syscall() */
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 6e574c02e4c3..ea781b29f7f1 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -33,6 +33,7 @@
#include <asm/processor.h>
#include <asm/sigcontext.h>
#include <linux/uaccess.h>
+#include <asm/irq_regs.h>
static struct hard_trap_info {
unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */
@@ -214,7 +215,7 @@ void kgdb_call_nmi_hook(void *ignored)
old_fs = get_fs();
set_fs(KERNEL_DS);
- kgdb_nmicallback(raw_smp_processor_id(), NULL);
+ kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
set_fs(old_fs);
}
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0057c910bc2f..3a62f80958e1 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1419,7 +1419,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
sd.nr = syscall;
sd.arch = syscall_get_arch();
- syscall_get_arguments(current, regs, 0, 6, args);
+ syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
sd.args[i] = args[i];
sd.instruction_pointer = KSTK_EIP(current);
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 710a59764b01..a32f843cdbe0 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -118,7 +118,6 @@ static void shutdown_bridge_irq(struct irq_data *d)
{
struct hub_irq_data *hd = irq_data_get_irq_chip_data(d);
struct bridge_controller *bc;
- int pin = hd->pin;
if (!hd)
return;
@@ -126,7 +125,7 @@ static void shutdown_bridge_irq(struct irq_data *d)
disable_hub_irq(d);
bc = hd->bc;
- bridge_clr(bc, b_int_enable, (1 << pin));
+ bridge_clr(bc, b_int_enable, (1 << hd->pin));
bridge_read(bc, b_wid_tflush);
}
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index f7e5e86765fe..671ebd357496 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -108,81 +108,41 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
* syscall_get_arguments - extract system call parameter values
* @task: task of interest, must be blocked
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array filled with argument values
*
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5). Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call (from 0 through 5). The first
+ * argument is stored in @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
#define SYSCALL_MAX_ARGS 6
void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- if (n == 0)
- return;
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_r0;
- args++;
- i++;
- n--;
- }
-
- memcpy(args, &regs->uregs[0] + i, n * sizeof(args[0]));
+ args[0] = regs->orig_r0;
+ args++;
+ memcpy(args, &regs->uregs[0] + 1, 5 * sizeof(args[0]));
}
/**
* syscall_set_arguments - change system call parameter value
* @task: task of interest, must be in system call entry tracing
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array of argument values to store
*
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call. The first argument gets value
+ * @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (n == 0)
- return;
-
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warn("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
-
- if (i == 0) {
- regs->orig_r0 = args[0];
- args++;
- i++;
- n--;
- }
+ regs->orig_r0 = args[0];
+ args++;
- memcpy(&regs->uregs[0] + i, args, n * sizeof(args[0]));
+ memcpy(&regs->uregs[0] + 1, args, 5 * sizeof(args[0]));
}
#endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 8fde4fa2c34f..88a667d12aaa 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index 9de220854c4a..d7624ed06efb 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -58,81 +58,25 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
static inline void syscall_get_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- unsigned long *args)
+ struct pt_regs *regs, unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = regs->r4;
- case 1:
- if (!n--)
- break;
- *args++ = regs->r5;
- case 2:
- if (!n--)
- break;
- *args++ = regs->r6;
- case 3:
- if (!n--)
- break;
- *args++ = regs->r7;
- case 4:
- if (!n--)
- break;
- *args++ = regs->r8;
- case 5:
- if (!n--)
- break;
- *args++ = regs->r9;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- }
+ *args++ = regs->r4;
+ *args++ = regs->r5;
+ *args++ = regs->r6;
+ *args++ = regs->r7;
+ *args++ = regs->r8;
+ *args = regs->r9;
}
static inline void syscall_set_arguments(struct task_struct *task,
- struct pt_regs *regs, unsigned int i, unsigned int n,
- const unsigned long *args)
+ struct pt_regs *regs, const unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- switch (i) {
- case 0:
- if (!n--)
- break;
- regs->r4 = *args++;
- case 1:
- if (!n--)
- break;
- regs->r5 = *args++;
- case 2:
- if (!n--)
- break;
- regs->r6 = *args++;
- case 3:
- if (!n--)
- break;
- regs->r7 = *args++;
- case 4:
- if (!n--)
- break;
- regs->r8 = *args++;
- case 5:
- if (!n--)
- break;
- regs->r9 = *args++;
- case 6:
- if (!n)
- break;
- default:
- BUG();
- }
+ regs->r4 = *args++;
+ regs->r5 = *args++;
+ regs->r6 = *args++;
+ regs->r7 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
}
#endif
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
generic-y += ucontext.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 5a73e2956ac4..22aa97136c01 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -20,6 +20,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index 2db9f1cf0694..b4ff07c1baed 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -56,20 +56,16 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
static inline void
syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args)
+ unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- memcpy(args, &regs->gpr[3 + i], n * sizeof(args[0]));
+ memcpy(args, &regs->gpr[3], 6 * sizeof(args[0]));
}
static inline void
syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, const unsigned long *args)
+ const unsigned long *args)
{
- BUG_ON(i + n > 6);
-
- memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
generic-y += ucontext.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 6f49e77d82a2..9bcd0c903dbb 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 2a27b275ab09..9ff033d261ab 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -22,13 +22,14 @@ unsigned long profile_pc(struct pt_regs *);
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
- return regs->gr[20];
+ return regs->gr[28];
}
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- regs->iaoq[0] = val;
+ regs->iaoq[0] = val;
+ regs->iaoq[1] = val + 4;
}
/* Query offset/name of register from its name/offset */
diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index 8bff1a58c97f..62a6d477fae0 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h
@@ -18,29 +18,15 @@ static inline long syscall_get_nr(struct task_struct *tsk,
}
static inline void syscall_get_arguments(struct task_struct *tsk,
- struct pt_regs *regs, unsigned int i,
- unsigned int n, unsigned long *args)
+ struct pt_regs *regs,
+ unsigned long *args)
{
- BUG_ON(i);
-
- switch (n) {
- case 6:
- args[5] = regs->gr[21];
- case 5:
- args[4] = regs->gr[22];
- case 4:
- args[3] = regs->gr[23];
- case 3:
- args[2] = regs->gr[24];
- case 2:
- args[1] = regs->gr[25];
- case 1:
- args[0] = regs->gr[26];
- case 0:
- break;
- default:
- BUG();
- }
+ args[5] = regs->gr[21];
+ args[4] = regs->gr[22];
+ args[3] = regs->gr[23];
+ args[2] = regs->gr[24];
+ args[1] = regs->gr[25];
+ args[0] = regs->gr[26];
}
static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 22fdbd08cdc8..2bd5b392277c 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -1,3 +1,2 @@
generated-y += unistd_32.h
generated-y += unistd_64.h
-generic-y += kvm_para.h
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index eb39e7e380d7..841db71958cd 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -210,12 +210,6 @@ void __cpuidle arch_cpu_idle(void)
static int __init parisc_idle_init(void)
{
- const char *marker;
-
- /* check QEMU/SeaBIOS marker in PAGE0 */
- marker = (char *) &PAGE0->pad0;
- running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
-
if (!running_on_qemu)
cpu_idle_poll_ctrl(1);
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 15dd9e21be7e..d908058d05c1 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -397,6 +397,9 @@ void __init start_parisc(void)
int ret, cpunum;
struct pdc_coproc_cfg coproc_cfg;
+ /* check QEMU/SeaBIOS marker in PAGE0 */
+ running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0);
+
cpunum = smp_processor_id();
init_cpu_topology();
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ece4dc89c90b..0fe8c1e46bbc 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void)
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
int hw_breakpoint_handler(struct die_args *args);
+extern int set_dawr(struct arch_hw_breakpoint *brk);
+extern bool dawr_force_enable;
+static inline bool dawr_enabled(void)
+{
+ return dawr_force_enable;
+}
+
#else /* CONFIG_HAVE_HW_BREAKPOINT */
static inline void hw_breakpoint_disable(void) { }
static inline void thread_change_pc(struct task_struct *tsk,
struct pt_regs *regs) { }
+static inline bool dawr_enabled(void) { return false; }
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif /* __KERNEL__ */
#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e6b5bb012ccb..013c76a0a03e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
struct kref kref;
};
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
u64 offset; /* in pages */
u64 size; /* window size in pages */
struct list_head iommu_tables;
+ struct mutex alloc_lock;
struct page *pages[0];
};
@@ -222,6 +225,7 @@ extern struct kvm_device_ops kvm_xics_ops;
struct kvmppc_xive;
struct kvmppc_xive_vcpu;
extern struct kvm_device_ops kvm_xive_ops;
+extern struct kvm_device_ops kvm_xive_native_ops;
struct kvmppc_passthru_irqmap;
@@ -312,7 +316,11 @@ struct kvm_arch {
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
- struct kvmppc_xive *xive;
+ struct kvmppc_xive *xive; /* Current XIVE device in use */
+ struct {
+ struct kvmppc_xive *native;
+ struct kvmppc_xive *xics_on_xive;
+ } xive_devices;
struct kvmppc_passthru_irqmap *pimap;
#endif
struct kvmppc_ops *kvm_ops;
@@ -449,6 +457,7 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_DEFAULT 0
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
+#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */
#define MMIO_HPTE_CACHE_SIZE 4
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ac22b28ae78d..bc892380e6cd 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -197,10 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS)
-extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
- unsigned long *ua, unsigned long **prmap);
-extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
- unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
@@ -273,6 +269,7 @@ union kvmppc_one_reg {
u64 addr;
u64 length;
} vpaval;
+ u64 xive_timaval[2];
};
struct kvmppc_ops {
@@ -480,6 +477,9 @@ extern void kvm_hv_vm_activated(void);
extern void kvm_hv_vm_deactivated(void);
extern bool kvm_hv_mode_active(void);
+extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+ struct kvm_nested_guest *nested);
+
#else
static inline void __init kvm_cma_reserve(void)
{}
@@ -594,6 +594,22 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
+}
+
+extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_native_init_module(void);
+extern void kvmppc_xive_native_exit_module(void);
+extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val);
+extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val);
+
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; }
@@ -617,6 +633,21 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+ { return 0; }
+static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_native_init_module(void) { }
+static inline void kvmppc_xive_native_exit_module(void) { }
+static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val)
+{ return 0; }
+static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val)
+{ return -ENOENT; }
+
#endif /* CONFIG_KVM_XIVE */
#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER)
@@ -665,6 +696,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src);
long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
unsigned long slb_v, unsigned int status, bool data);
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 870fb7b239ea..e1d118ac61dc 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -186,8 +186,8 @@
#define OPAL_XIVE_FREE_IRQ 140
#define OPAL_XIVE_SYNC 141
#define OPAL_XIVE_DUMP 142
-#define OPAL_XIVE_RESERVED3 143
-#define OPAL_XIVE_RESERVED4 144
+#define OPAL_XIVE_GET_QUEUE_STATE 143
+#define OPAL_XIVE_SET_QUEUE_STATE 144
#define OPAL_SIGNAL_SYSTEM_RESET 145
#define OPAL_NPU_INIT_CONTEXT 146
#define OPAL_NPU_DESTROY_CONTEXT 147
@@ -210,7 +210,8 @@
#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
#define OPAL_NX_COPROC_INIT 167
-#define OPAL_LAST 167
+#define OPAL_XIVE_GET_VP_STATE 170
+#define OPAL_LAST 170
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a55b01c90bb1..4e978d4dea5c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -279,6 +279,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
+int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
+ __be32 *out_qtoggle,
+ __be32 *out_qindex);
+int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
+ uint32_t qtoggle,
+ uint32_t qindex);
+int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
uint64_t desc, uint16_t pe_number);
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index c5698a523bb1..23f7ed796f38 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -302,6 +302,7 @@
/* Misc instructions for BPF compiler */
#define PPC_INST_LBZ 0x88000000
#define PPC_INST_LD 0xe8000000
+#define PPC_INST_LDX 0x7c00002a
#define PPC_INST_LHZ 0xa0000000
#define PPC_INST_LWZ 0x80000000
#define PPC_INST_LHBRX 0x7c00062c
@@ -309,6 +310,7 @@
#define PPC_INST_STB 0x98000000
#define PPC_INST_STH 0xb0000000
#define PPC_INST_STD 0xf8000000
+#define PPC_INST_STDX 0x7c00012a
#define PPC_INST_STDU 0xf8000001
#define PPC_INST_STW 0x90000000
#define PPC_INST_STWU 0x94000000
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 1a0e7a8b1c81..1243045bad2d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -65,22 +65,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
unsigned long val, mask = -1UL;
-
- BUG_ON(i + n > 6);
+ unsigned int n = 6;
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_32BIT))
mask = 0xffffffff;
#endif
while (n--) {
- if (n == 0 && i == 0)
+ if (n == 0)
val = regs->orig_gpr3;
else
- val = regs->gpr[3 + i + n];
+ val = regs->gpr[3 + n];
args[n] = val & mask;
}
@@ -88,15 +86,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
/* Also copy the first argument into orig_gpr3 */
- if (i == 0 && n > 0)
- regs->orig_gpr3 = args[0];
+ regs->orig_gpr3 = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 3c704f5dd3ae..eaf76f57023a 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -23,6 +23,7 @@
* same offset regardless of where the code is executing
*/
extern void __iomem *xive_tima;
+extern unsigned long xive_tima_os;
/*
* Offset in the TM area of our current execution level (provided by
@@ -73,6 +74,8 @@ struct xive_q {
u32 esc_irq;
atomic_t count;
atomic_t pending_count;
+ u64 guest_qaddr;
+ u32 guest_qshift;
};
/* Global enable flags for the XIVE support */
@@ -109,12 +112,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq);
+extern void xive_native_sync_queue(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
extern bool xive_native_has_single_escalation(void);
+extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags);
+
+extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
+ u32 *qindex);
+extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
+ u32 qindex);
+extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+
#else
static inline bool xive_enabled(void) { return false; }
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 26ca425f4c2c..b0f72dea8b11 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
+#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d)
+
/* Device control API: PPC-specific devices */
#define KVM_DEV_MPIC_GRP_MISC 1
#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
@@ -677,4 +679,48 @@ struct kvm_ppc_cpu_char {
#define KVM_XICS_PRESENTED (1ULL << 43)
#define KVM_XICS_QUEUED (1ULL << 44)
+/* POWER9 XIVE Native Interrupt Controller */
+#define KVM_DEV_XIVE_GRP_CTRL 1
+#define KVM_DEV_XIVE_RESET 1
+#define KVM_DEV_XIVE_EQ_SYNC 2
+#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */
+
+/* Layout of 64-bit XIVE source attribute values */
+#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0)
+#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1)
+
+/* Layout of 64-bit XIVE source configuration attribute values */
+#define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0
+#define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7
+#define KVM_XIVE_SOURCE_SERVER_SHIFT 3
+#define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL
+#define KVM_XIVE_SOURCE_MASKED_SHIFT 32
+#define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL
+#define KVM_XIVE_SOURCE_EISN_SHIFT 33
+#define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL
+
+/* Layout of 64-bit EQ identifier */
+#define KVM_XIVE_EQ_PRIORITY_SHIFT 0
+#define KVM_XIVE_EQ_PRIORITY_MASK 0x7
+#define KVM_XIVE_EQ_SERVER_SHIFT 3
+#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL
+
+/* Layout of EQ configuration values (64 bytes) */
+struct kvm_ppc_xive_eq {
+ __u32 flags;
+ __u32 qshift;
+ __u64 qaddr;
+ __u32 qtoggle;
+ __u32 qindex;
+ __u8 pad[40];
+};
+
+#define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001
+
+#define KVM_XIVE_TIMA_PAGE_OFFSET 0
+#define KVM_XIVE_ESB_PAGE_OFFSET 4
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index fec8a6773119..da307dd93ee3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -29,11 +29,15 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/hvcall.h>
#include <linux/uaccess.h>
/*
@@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
if (!ppc_breakpoint_available())
return -ENODEV;
length_max = 8; /* DABR */
- if (cpu_has_feature(CPU_FTR_DAWR)) {
+ if (dawr_enabled()) {
length_max = 512 ; /* 64 doublewords */
/* DAWR region can't cross 512 boundary */
if ((attr->bp_addr >> 9) !=
@@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+static ssize_t dawr_write_file_bool(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct arch_hw_breakpoint null_brk = {0, 0, 0};
+ size_t rc;
+
+ /* Send error to user if they hypervisor won't allow us to write DAWR */
+ if ((!dawr_force_enable) &&
+ (firmware_has_feature(FW_FEATURE_LPAR)) &&
+ (set_dawr(&null_brk) != H_SUCCESS))
+ return -1;
+
+ rc = debugfs_write_file_bool(file, user_buf, count, ppos);
+ if (rc)
+ return rc;
+
+ /* If we are clearing, make sure all CPUs have the DAWR cleared */
+ if (!dawr_force_enable)
+ smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0);
+
+ return rc;
+}
+
+static const struct file_operations dawr_enable_fops = {
+ .read = debugfs_read_file_bool,
+ .write = dawr_write_file_bool,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int __init dawr_force_setup(void)
+{
+ dawr_force_enable = false;
+
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ /* Don't setup sysfs file for user control on P8 */
+ dawr_force_enable = true;
+ return 0;
+ }
+
+ if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
+ /* Turn DAWR off by default, but allow admin to turn it on */
+ dawr_force_enable = false;
+ debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
+ powerpc_debugfs_root,
+ &dawr_force_enable,
+ &dawr_enable_fops);
+ }
+ return 0;
+}
+arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 683b5b3805bd..cd381e2291df 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -22,6 +22,7 @@
#include <linux/kvm_host.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/kmemleak.h>
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
@@ -712,6 +713,12 @@ static void kvm_use_magic_page(void)
static __init void kvm_free_tmp(void)
{
+ /*
+ * Inform kmemleak about the hole in the .bss section since the
+ * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
+ */
+ kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
+ ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
free_reserved_area(&kvm_tmp[kvm_tmp_index],
&kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dd9e0d5386ee..225705aac814 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -67,6 +67,7 @@
#include <asm/cpu_has_feature.h>
#include <asm/asm-prototypes.h>
#include <asm/stacktrace.h>
+#include <asm/hw_breakpoint.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -784,7 +785,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
return __set_dabr(dabr, dabrx);
}
-static inline int set_dawr(struct arch_hw_breakpoint *brk)
+int set_dawr(struct arch_hw_breakpoint *brk)
{
unsigned long dawr, dawrx, mrd;
@@ -816,7 +817,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
- if (cpu_has_feature(CPU_FTR_DAWR))
+ if (dawr_enabled())
// Power8 or later
set_dawr(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
@@ -830,8 +831,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
/* Check if we have DAWR or DABR hardware */
bool ppc_breakpoint_available(void)
{
- if (cpu_has_feature(CPU_FTR_DAWR))
- return true; /* POWER8 DAWR */
+ if (dawr_enabled())
+ return true; /* POWER8 DAWR or POWER9 forced DAWR */
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return false; /* POWER9 with DAWR disabled */
/* DABR: Everything but POWER8 and POWER9 */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index d9ac7d94656e..684b0b315c32 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -43,6 +43,7 @@
#include <asm/tm.h>
#include <asm/asm-prototypes.h>
#include <asm/debug.h>
+#include <asm/hw_breakpoint.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request,
dbginfo.sizeof_condition = 0;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
- if (cpu_has_feature(CPU_FTR_DAWR))
+ if (dawr_enabled())
dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
#else
dbginfo.features = 0;
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 3223aec88b2c..4c67cc79de7c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -94,7 +94,7 @@ endif
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
-kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
+kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o
kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
kvm-book3s_64-module-objs := \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 10c5579d20ce..61a212d0daf0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -651,6 +651,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_native_get_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
*val = get_reg_val(id, vcpu->arch.fscr);
break;
@@ -724,6 +736,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_native_set_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
vcpu->arch.fscr = set_reg_val(id, *val);
break;
@@ -891,6 +915,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvmppc_rtas_tokens_free(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
#endif
+
+#ifdef CONFIG_KVM_XICS
+ /*
+ * Free the XIVE devices which are not directly freed by the
+ * device 'release' method
+ */
+ kfree(kvm->arch.xive_devices.native);
+ kvm->arch.xive_devices.native = NULL;
+ kfree(kvm->arch.xive_devices.xics_on_xive);
+ kvm->arch.xive_devices.xics_on_xive = NULL;
+#endif /* CONFIG_KVM_XICS */
}
int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
@@ -1050,6 +1085,9 @@ static int kvmppc_book3s_init(void)
if (xics_on_xive()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+ kvmppc_xive_native_init_module();
+ kvm_register_device_ops(&kvm_xive_native_ops,
+ KVM_DEV_TYPE_XIVE);
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
@@ -1060,8 +1098,10 @@ static int kvmppc_book3s_init(void)
static void kvmppc_book3s_exit(void)
{
#ifdef CONFIG_KVM_XICS
- if (xics_on_xive())
+ if (xics_on_xive()) {
kvmppc_xive_exit_module();
+ kvmppc_xive_native_exit_module();
+ }
#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kvmppc_book3s_exit_pr();
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index f02b04973710..66270e07449a 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
unsigned long i, npages = kvmppc_tce_pages(stt->size);
for (i = 0; i < npages; i++)
- __free_page(stt->pages[i]);
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
kfree(stt);
}
+static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
+ unsigned long sttpage)
+{
+ struct page *page = stt->pages[sttpage];
+
+ if (page)
+ return page;
+
+ mutex_lock(&stt->alloc_lock);
+ page = stt->pages[sttpage];
+ if (!page) {
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ WARN_ON_ONCE(!page);
+ if (page)
+ stt->pages[sttpage] = page;
+ }
+ mutex_unlock(&stt->alloc_lock);
+
+ return page;
+}
+
static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
{
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
- page = stt->pages[vmf->pgoff];
+ page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
+ if (!page)
+ return VM_FAULT_OOM;
+
get_page(page);
vmf->page = page;
return 0;
@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
- int i;
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
+ mutex_init(&stt->alloc_lock);
INIT_LIST_HEAD_RCU(&stt->iommu_tables);
- for (i = 0; i < npages; i++) {
- stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!stt->pages[i])
- goto fail;
- }
-
mutex_lock(&kvm->lock);
/* Check this LIOBN hasn't been previously allocated */
@@ -352,17 +371,28 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (ret >= 0)
return ret;
- fail:
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
-
kfree(stt);
fail_acct:
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
return ret;
}
+static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
+ unsigned long *ua)
+{
+ unsigned long gfn = tce >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+ return 0;
+}
+
static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{
@@ -378,7 +408,7 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_TOO_HARD;
- if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
return H_TOO_HARD;
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
@@ -397,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ */
+static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ unsigned long idx, unsigned long tce)
+{
+ struct page *page;
+ u64 *tbl;
+ unsigned long sttpage;
+
+ idx -= stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ page = stt->pages[sttpage];
+
+ if (!page) {
+ /* We allow any TCE, not just with read|write permissions */
+ if (!tce)
+ return;
+
+ page = kvm_spapr_get_tce_page(stt, sttpage);
+ if (!page)
+ return;
+ }
+ tbl = page_to_virt(page);
+
+ tbl[idx % TCES_PER_PAGE] = tce;
+}
+
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry)
{
@@ -543,15 +603,15 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
- return ret;
+ goto unlock_exit;
dir = iommu_tce_direction(tce);
- idx = srcu_read_lock(&vcpu->kvm->srcu);
-
- if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
+ if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
ret = H_PARAMETER;
goto unlock_exit;
}
@@ -612,7 +672,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
@@ -647,7 +707,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
}
tce = be64_to_cpu(tce);
- if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 2206bc729b9a..484b47fa3960 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -66,8 +66,6 @@
#endif
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
-
/*
* Finds a TCE table descriptor by LIOBN.
*
@@ -88,6 +86,25 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
EXPORT_SYMBOL_GPL(kvmppc_find_table);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce,
+ unsigned long *ua, unsigned long **prmap)
+{
+ unsigned long gfn = tce >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+ if (prmap)
+ *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+
+ return 0;
+}
+
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
@@ -111,7 +128,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
- if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -129,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
-#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
/* Note on the use of page_address() in real mode,
*
@@ -161,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page)
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
- * Called in both real and virtual modes.
- * Cannot fail so kvmppc_tce_validate must be called before it.
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
+ * Cannot fail so kvmppc_rm_tce_validate must be called before it.
*/
-void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
@@ -175,35 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
+ /*
+ * page must not be NULL in real mode,
+ * kvmppc_rm_ioba_validate() must have taken care of this.
+ */
+ WARN_ON_ONCE_RM(!page);
tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_put);
-long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
- unsigned long *ua, unsigned long **prmap)
+/*
+ * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
+ * in real mode.
+ * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
+ * allocated or not required (when clearing a tce entry).
+ */
+static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages, bool clearing)
{
- unsigned long gfn = tce >> PAGE_SHIFT;
- struct kvm_memory_slot *memslot;
+ unsigned long i, idx, sttpage, sttpages;
+ unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);
- memslot = search_memslots(kvm_memslots(kvm), gfn);
- if (!memslot)
- return -EINVAL;
-
- *ua = __gfn_to_hva_memslot(memslot, gfn) |
- (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+ if (ret)
+ return ret;
+ /*
+ * clearing==true says kvmppc_rm_tce_put won't be allocating pages
+ * for empty tces.
+ */
+ if (clearing)
+ return H_SUCCESS;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- if (prmap)
- *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
-#endif
+ idx = (ioba >> stt->page_shift) - stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
+ TCES_PER_PAGE;
+ for (i = sttpage; i < sttpage + sttpages; ++i)
+ if (!stt->pages[i])
+ return H_TOO_HARD;
- return 0;
+ return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
@@ -381,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (!stt)
return H_TOO_HARD;
- ret = kvmppc_ioba_validate(stt, ioba, 1);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
if (ret != H_SUCCESS)
return ret;
@@ -390,7 +415,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
return ret;
dir = iommu_tce_direction(tce);
- if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
entry = ioba >> stt->page_shift;
@@ -409,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
}
}
- kvmppc_tce_put(stt, entry, tce);
+ kvmppc_rm_tce_put(stt, entry, tce);
return H_SUCCESS;
}
@@ -480,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
- ret = kvmppc_ioba_validate(stt, ioba, npages);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
if (ret != H_SUCCESS)
return ret;
@@ -492,7 +517,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
struct mm_iommu_table_group_mem_t *mem;
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -508,7 +533,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
* We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte().
*/
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
return H_TOO_HARD;
rmap = (void *) vmalloc_to_phys(rmap);
@@ -542,7 +567,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
- if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -557,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
}
}
- kvmppc_tce_put(stt, entry + i, tce);
+ kvmppc_rm_tce_put(stt, entry + i, tce);
}
unlock_exit:
@@ -583,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
if (!stt)
return H_TOO_HARD;
- ret = kvmppc_ioba_validate(stt, ioba, npages);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
if (ret != H_SUCCESS)
return ret;
@@ -610,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
}
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
- kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+ kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
return H_SUCCESS;
}
@@ -635,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
idx = (ioba >> stt->page_shift) - stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
+ if (!page) {
+ vcpu->arch.regs.gpr[4] = 0;
+ return H_SUCCESS;
+ }
tbl = (u64 *)page_address(page);
vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 06964350b97a..d5fc624e0655 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -74,6 +74,7 @@
#include <asm/opal.h>
#include <asm/xics.h>
#include <asm/xive.h>
+#include <asm/hw_breakpoint.h>
#include "book3s.h"
@@ -749,7 +750,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
/*
* Ensure that the read of vcore->dpdes comes after the read
* of vcpu->doorbell_request. This barrier matches the
- * smb_wmb() in kvmppc_guest_entry_inject().
+ * smp_wmb() in kvmppc_guest_entry_inject().
*/
smp_rmb();
vc = vcpu->arch.vcore;
@@ -801,6 +802,80 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
}
}
+/* Copy guest memory in place - must reside within a single memslot */
+static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from,
+ unsigned long len)
+{
+ struct kvm_memory_slot *to_memslot = NULL;
+ struct kvm_memory_slot *from_memslot = NULL;
+ unsigned long to_addr, from_addr;
+ int r;
+
+ /* Get HPA for from address */
+ from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT);
+ if (!from_memslot)
+ return -EFAULT;
+ if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages)
+ << PAGE_SHIFT))
+ return -EINVAL;
+ from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT);
+ if (kvm_is_error_hva(from_addr))
+ return -EFAULT;
+ from_addr |= (from & (PAGE_SIZE - 1));
+
+ /* Get HPA for to address */
+ to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT);
+ if (!to_memslot)
+ return -EFAULT;
+ if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages)
+ << PAGE_SHIFT))
+ return -EINVAL;
+ to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT);
+ if (kvm_is_error_hva(to_addr))
+ return -EFAULT;
+ to_addr |= (to & (PAGE_SIZE - 1));
+
+ /* Perform copy */
+ r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr,
+ len);
+ if (r)
+ return -EFAULT;
+ mark_page_dirty(kvm, to >> PAGE_SHIFT);
+ return 0;
+}
+
+static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src)
+{
+ u64 pg_sz = SZ_4K; /* 4K page size */
+ u64 pg_mask = SZ_4K - 1;
+ int ret;
+
+ /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+ if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+ H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+ return H_PARAMETER;
+
+ /* dest (and src if copy_page flag set) must be page aligned */
+ if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+ return H_PARAMETER;
+
+ /* zero and/or copy the page as determined by the flags */
+ if (flags & H_COPY_PAGE) {
+ ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz);
+ if (ret < 0)
+ return H_PARAMETER;
+ } else if (flags & H_ZERO_PAGE) {
+ ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz);
+ if (ret < 0)
+ return H_PARAMETER;
+ }
+
+ /* We can ignore the remaining flags */
+
+ return H_SUCCESS;
+}
+
static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
{
struct kvmppc_vcore *vcore = target->arch.vcore;
@@ -1003,6 +1078,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (nesting_enabled(vcpu->kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
+ case H_PAGE_INIT:
+ ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
default:
return RESUME_HOST;
}
@@ -1047,6 +1127,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_IPOLL:
case H_XIRR_X:
#endif
+ case H_PAGE_INIT:
return 1;
}
@@ -2504,37 +2585,6 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
}
}
-static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
- struct kvm_nested_guest *nested)
-{
- cpumask_t *need_tlb_flush;
- int lpid;
-
- if (!cpu_has_feature(CPU_FTR_HVMODE))
- return;
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- pcpu &= ~0x3UL;
-
- if (nested) {
- lpid = nested->shadow_lpid;
- need_tlb_flush = &nested->need_tlb_flush;
- } else {
- lpid = kvm->arch.lpid;
- need_tlb_flush = &kvm->arch.need_tlb_flush;
- }
-
- mtspr(SPRN_LPID, lpid);
- isync();
- smp_mb();
-
- if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
- radix__local_flush_tlb_lpid_guest(lpid);
- /* Clear the bit after the TLB flush */
- cpumask_clear_cpu(pcpu, need_tlb_flush);
- }
-}
-
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
@@ -3228,19 +3278,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
for (sub = 0; sub < core_info.n_subcores; ++sub)
spin_unlock(&core_info.vc[sub]->lock);
- if (kvm_is_radix(vc->kvm)) {
- /*
- * Do we need to flush the process scoped TLB for the LPAR?
- *
- * On POWER9, individual threads can come in here, but the
- * TLB is shared between the 4 threads in a core, hence
- * invalidating on one thread invalidates for all.
- * Thus we make all 4 threads use the same bit here.
- *
- * Hash must be flushed in realmode in order to use tlbiel.
- */
- kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL);
- }
+ guest_enter_irqoff();
+
+ srcu_idx = srcu_read_lock(&vc->kvm->srcu);
+
+ this_cpu_disable_ftrace();
/*
* Interrupts will be enabled once we get into the guest,
@@ -3248,19 +3290,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
trace_hardirqs_on();
- guest_enter_irqoff();
-
- srcu_idx = srcu_read_lock(&vc->kvm->srcu);
-
- this_cpu_disable_ftrace();
-
trap = __kvmppc_vcore_entry();
+ trace_hardirqs_off();
+
this_cpu_enable_ftrace();
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
- trace_hardirqs_off();
set_irq_happened(trap);
spin_lock(&vc->lock);
@@ -3374,7 +3411,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
- if (cpu_has_feature(CPU_FTR_DAWR)) {
+ if (dawr_enabled()) {
mtspr(SPRN_DAWR, vcpu->arch.dawr);
mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
}
@@ -3423,7 +3460,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
- mtspr(SPRN_PSSCR, host_psscr);
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_psscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR, host_dawr);
@@ -3511,6 +3550,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#ifdef CONFIG_ALTIVEC
load_vr_state(&vcpu->arch.vr);
#endif
+ mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
mtspr(SPRN_DSCR, vcpu->arch.dscr);
mtspr(SPRN_IAMR, vcpu->arch.iamr);
@@ -3602,6 +3642,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#ifdef CONFIG_ALTIVEC
store_vr_state(&vcpu->arch.vr);
#endif
+ vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
@@ -3967,7 +4008,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
unsigned long lpcr)
{
int trap, r, pcpu;
- int srcu_idx;
+ int srcu_idx, lpid;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
@@ -4043,8 +4084,12 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested);
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+ mtspr(SPRN_LPID, lpid);
+ isync();
+ kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
+ }
trace_hardirqs_on();
guest_enter_irqoff();
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index b0cf22477e87..6035d24f1d1d 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -805,3 +805,60 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
vcpu->arch.doorbell_request = 0;
}
}
+
+static void flush_guest_tlb(struct kvm *kvm)
+{
+ unsigned long rb, set;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ if (kvm_is_radix(kvm)) {
+ /* R=1 PRS=1 RIC=2 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (1), "i" (1), "i" (2),
+ "r" (0) : "memory");
+ for (set = 1; set < kvm->arch.tlb_sets; ++set) {
+ rb += PPC_BIT(51); /* increment set number */
+ /* R=1 PRS=1 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (1), "i" (1), "i" (0),
+ "r" (0) : "memory");
+ }
+ } else {
+ for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+ /* R=0 PRS=0 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (0), "i" (0), "i" (0),
+ "r" (0) : "memory");
+ rb += PPC_BIT(51); /* increment set number */
+ }
+ }
+ asm volatile("ptesync": : :"memory");
+}
+
+void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+ struct kvm_nested_guest *nested)
+{
+ cpumask_t *need_tlb_flush;
+
+ /*
+ * On POWER9, individual threads can come in here, but the
+ * TLB is shared between the 4 threads in a core, hence
+ * invalidating on one thread invalidates for all.
+ * Thus we make all 4 threads use the same bit.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pcpu = cpu_first_thread_sibling(pcpu);
+
+ if (nested)
+ need_tlb_flush = &nested->need_tlb_flush;
+ else
+ need_tlb_flush = &kvm->arch.need_tlb_flush;
+
+ if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
+ flush_guest_tlb(kvm);
+
+ /* Clear the bit after the TLB flush */
+ cpumask_clear_cpu(pcpu, need_tlb_flush);
+ }
+}
+EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 3b3791ed74a6..8431ad1e8391 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -13,6 +13,7 @@
#include <linux/hugetlb.h>
#include <linux/module.h>
#include <linux/log2.h>
+#include <linux/sizes.h>
#include <asm/trace.h>
#include <asm/kvm_ppc.h>
@@ -867,6 +868,149 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
return ret;
}
+static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa,
+ int writing, unsigned long *hpa,
+ struct kvm_memory_slot **memslot_p)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn, hva, pa, psize = PAGE_SHIFT;
+ unsigned int shift;
+ pte_t *ptep, pte;
+
+ /* Find the memslot for this address */
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ return H_PARAMETER;
+
+ /* Translate to host virtual address */
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+
+ /* Try to find the host pte for that virtual address */
+ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ if (!ptep)
+ return H_TOO_HARD;
+ pte = kvmppc_read_update_linux_pte(ptep, writing);
+ if (!pte_present(pte))
+ return H_TOO_HARD;
+
+ /* Convert to a physical address */
+ if (shift)
+ psize = 1UL << shift;
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ pa |= hva & (psize - 1);
+ pa |= gpa & ~PAGE_MASK;
+
+ if (hpa)
+ *hpa = pa;
+ if (memslot_p)
+ *memslot_p = memslot;
+
+ return H_SUCCESS;
+}
+
+static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
+ unsigned long dest)
+{
+ struct kvm_memory_slot *memslot;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long pa, mmu_seq;
+ long ret = H_SUCCESS;
+ int i;
+
+ /* Used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ /* Check if we've been invalidated */
+ raw_spin_lock(&kvm->mmu_lock.rlock);
+ if (mmu_notifier_retry(kvm, mmu_seq)) {
+ ret = H_TOO_HARD;
+ goto out_unlock;
+ }
+
+ /* Zero the page */
+ for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
+ dcbz((void *)pa);
+ kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
+
+out_unlock:
+ raw_spin_unlock(&kvm->mmu_lock.rlock);
+ return ret;
+}
+
+static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
+ unsigned long dest, unsigned long src)
+{
+ unsigned long dest_pa, src_pa, mmu_seq;
+ struct kvm_memory_slot *dest_memslot;
+ struct kvm *kvm = vcpu->kvm;
+ long ret = H_SUCCESS;
+
+ /* Used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot);
+ if (ret != H_SUCCESS)
+ return ret;
+ ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ /* Check if we've been invalidated */
+ raw_spin_lock(&kvm->mmu_lock.rlock);
+ if (mmu_notifier_retry(kvm, mmu_seq)) {
+ ret = H_TOO_HARD;
+ goto out_unlock;
+ }
+
+ /* Copy the page */
+ memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
+
+ kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
+
+out_unlock:
+ raw_spin_unlock(&kvm->mmu_lock.rlock);
+ return ret;
+}
+
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 pg_mask = SZ_4K - 1; /* 4K page size */
+ long ret = H_SUCCESS;
+
+ /* Don't handle radix mode here, go up to the virtual mode handler */
+ if (kvm_is_radix(kvm))
+ return H_TOO_HARD;
+
+ /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+ if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+ H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+ return H_PARAMETER;
+
+ /* dest (and src if copy_page flag set) must be page aligned */
+ if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+ return H_PARAMETER;
+
+ /* zero and/or copy the page as determined by the flags */
+ if (flags & H_COPY_PAGE)
+ ret = kvmppc_do_h_page_init_copy(vcpu, dest, src);
+ else if (flags & H_ZERO_PAGE)
+ ret = kvmppc_do_h_page_init_zero(vcpu, dest);
+
+ /* We can ignore the other flags */
+
+ return ret;
+}
+
void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index)
{
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3a5e719ef032..ad7bee97de77 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -581,11 +581,8 @@ kvmppc_hv_entry:
1:
#endif
- /* Use cr7 as an indication of radix mode */
ld r5, HSTATE_KVM_VCORE(r13)
ld r9, VCORE_KVM(r5) /* pointer to struct kvm */
- lbz r0, KVM_RADIX(r9)
- cmpwi cr7, r0, 0
/*
* POWER7/POWER8 host -> guest partition switch code.
@@ -608,9 +605,6 @@ kvmppc_hv_entry:
cmpwi r6,0
bne 10f
- /* Radix has already switched LPID and flushed core TLB */
- bne cr7, 22f
-
lwz r7,KVM_LPID(r9)
BEGIN_FTR_SECTION
ld r6,KVM_SDR1(r9)
@@ -622,41 +616,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
- /* See if we need to flush the TLB. Hash has to be done in RM */
- lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
-BEGIN_FTR_SECTION
- /*
- * On POWER9, individual threads can come in here, but the
- * TLB is shared between the 4 threads in a core, hence
- * invalidating on one thread invalidates for all.
- * Thus we make all 4 threads use the same bit here.
- */
- clrrdi r6,r6,2
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- clrldi r7,r6,64-6 /* extract bit number (6 bits) */
- srdi r6,r6,6 /* doubleword number */
- sldi r6,r6,3 /* address offset */
- add r6,r6,r9
- addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
- li r8,1
- sld r8,r8,r7
- ld r7,0(r6)
- and. r7,r7,r8
- beq 22f
- /* Flush the TLB of any entries for this LPID */
- lwz r0,KVM_TLB_SETS(r9)
- mtctr r0
- li r7,0x800 /* IS field = 0b10 */
- ptesync
- li r0,0 /* RS for P9 version of tlbiel */
-28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */
- addi r7,r7,0x1000
- bdnz 28b
- ptesync
-23: ldarx r7,0,r6 /* clear the bit after TLB flushed */
- andc r7,r7,r8
- stdcx. r7,0,r6
- bne 23b
+ /* See if we need to flush the TLB. */
+ mr r3, r9 /* kvm pointer */
+ lhz r4, PACAPACAINDEX(r13) /* physical cpu number */
+ li r5, 0 /* nested vcpu pointer */
+ bl kvmppc_check_need_tlb_flush
+ nop
+ ld r5, HSTATE_KVM_VCORE(r13)
/* Add timebase offset onto timebase */
22: ld r8,VCORE_TB_OFFSET(r5)
@@ -822,18 +788,21 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_IAMR, r5
mtspr SPRN_PSPB, r6
mtspr SPRN_FSCR, r7
- ld r5, VCPU_DAWR(r4)
- ld r6, VCPU_DAWRX(r4)
- ld r7, VCPU_CIABR(r4)
- ld r8, VCPU_TAR(r4)
/*
* Handle broken DAWR case by not writing it. This means we
* can still store the DAWR register for migration.
*/
-BEGIN_FTR_SECTION
+ LOAD_REG_ADDR(r5, dawr_force_enable)
+ lbz r5, 0(r5)
+ cmpdi r5, 0
+ beq 1f
+ ld r5, VCPU_DAWR(r4)
+ ld r6, VCPU_DAWRX(r4)
mtspr SPRN_DAWR, r5
mtspr SPRN_DAWRX, r6
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
+1:
+ ld r7, VCPU_CIABR(r4)
+ ld r8, VCPU_TAR(r4)
mtspr SPRN_CIABR, r7
mtspr SPRN_TAR, r8
ld r5, VCPU_IC(r4)
@@ -969,17 +938,27 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
#ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */
- ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr0, r10, 0
- beq no_xive
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
+ lwz r8, VCPU_XIVE_CAM_WORD(r4)
+ li r7, TM_QW1_OS + TM_WORD2
+ mfmsr r0
+ andi. r0, r0, MSR_DR /* in real mode? */
+ beq 2f
+ ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
+ cmpldi cr1, r10, 0
+ beq cr1, no_xive
+ eieio
+ stdx r11,r9,r10
+ stwx r8,r7,r10
+ b 3f
+2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
+ cmpldi cr1, r10, 0
+ beq cr1, no_xive
eieio
stdcix r11,r9,r10
- lwz r11, VCPU_XIVE_CAM_WORD(r4)
- li r9, TM_QW1_OS + TM_WORD2
- stwcix r11,r9,r10
- li r9, 1
+ stwcix r8,r7,r10
+3: li r9, 1
stb r9, VCPU_XIVE_PUSHED(r4)
eieio
@@ -998,12 +977,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
* on, we mask it.
*/
lbz r0, VCPU_XIVE_ESC_ON(r4)
- cmpwi r0,0
- beq 1f
- ld r10, VCPU_XIVE_ESC_RADDR(r4)
+ cmpwi cr1, r0,0
+ beq cr1, 1f
li r9, XIVE_ESB_SET_PQ_01
+ beq 4f /* in real mode? */
+ ld r10, VCPU_XIVE_ESC_VADDR(r4)
+ ldx r0, r10, r9
+ b 5f
+4: ld r10, VCPU_XIVE_ESC_RADDR(r4)
ldcix r0, r10, r9
- sync
+5: sync
/* We have a possible subtle race here: The escalation interrupt might
* have fired and be on its way to the host queue while we mask it,
@@ -2281,7 +2264,7 @@ hcall_real_table:
#endif
.long 0 /* 0x24 - H_SET_SPRG0 */
.long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
- .long 0 /* 0x2c */
+ .long DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table
.long 0 /* 0x30 */
.long 0 /* 0x34 */
.long 0 /* 0x38 */
@@ -2513,11 +2496,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
blr
2:
-BEGIN_FTR_SECTION
- /* POWER9 with disabled DAWR */
+ LOAD_REG_ADDR(r11, dawr_force_enable)
+ lbz r11, 0(r11)
+ cmpdi r11, 0
li r3, H_HARDWARE
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
+ beqlr
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
rlwimi r5, r4, 2, DAWRX_WT
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index f78d002f0fe0..4953957333b7 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -166,7 +166,8 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
return IRQ_HANDLED;
}
-static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+ bool single_escalation)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct xive_q *q = &xc->queues[prio];
@@ -185,7 +186,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
return -EIO;
}
- if (xc->xive->single_escalation)
+ if (single_escalation)
name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
vcpu->kvm->arch.lpid, xc->server_num);
else
@@ -217,7 +218,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
* interrupt, thus leaving it effectively masked after
* it fires once.
*/
- if (xc->xive->single_escalation) {
+ if (single_escalation) {
struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -291,7 +292,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
continue;
rc = xive_provision_queue(vcpu, prio);
if (rc == 0 && !xive->single_escalation)
- xive_attach_escalation(vcpu, prio);
+ kvmppc_xive_attach_escalation(vcpu, prio,
+ xive->single_escalation);
if (rc)
return rc;
}
@@ -342,7 +344,7 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
}
-static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
{
struct kvm_vcpu *vcpu;
int i, rc;
@@ -380,11 +382,6 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
return -EBUSY;
}
-static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
-{
- return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
-}
-
static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state)
@@ -430,8 +427,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
- xive_vp(xive, state->act_server),
- MASKED, state->number);
+ kvmppc_xive_vp(xive, state->act_server),
+ MASKED, state->number);
/* set old_p so we can track if an H_EOI was done */
state->old_p = true;
state->old_q = false;
@@ -486,8 +483,8 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
- xive_vp(xive, state->act_server),
- state->act_priority, state->number);
+ kvmppc_xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
/* If an EOI is needed, do it here */
if (!state->old_p)
xive_vm_source_eoi(hw_num, xd);
@@ -535,7 +532,7 @@ static int xive_target_interrupt(struct kvm *kvm,
* priority. The count for that new target will have
* already been incremented.
*/
- rc = xive_select_target(kvm, &server, prio);
+ rc = kvmppc_xive_select_target(kvm, &server, prio);
/*
* We failed to find a target ? Not much we can do
@@ -563,7 +560,7 @@ static int xive_target_interrupt(struct kvm *kvm,
kvmppc_xive_select_irq(state, &hw_num, NULL);
return xive_native_configure_irq(hw_num,
- xive_vp(xive, server),
+ kvmppc_xive_vp(xive, server),
prio, state->number);
}
@@ -849,7 +846,8 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
/*
* We can't update the state of a "pushed" VCPU, but that
- * shouldn't happen.
+ * shouldn't happen because the vcpu->mutex makes running a
+ * vcpu mutually exclusive with doing one_reg get/set on it.
*/
if (WARN_ON(vcpu->arch.xive_pushed))
return -EIO;
@@ -940,6 +938,13 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Turn the IPI hard off */
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ /*
+ * Reset ESB guest mapping. Needed when ESB pages are exposed
+ * to the guest in XIVE native mode
+ */
+ if (xive->ops && xive->ops->reset_mapped)
+ xive->ops->reset_mapped(kvm, guest_irq);
+
/* Grab info about irq */
state->pt_number = hw_irq;
state->pt_data = irq_data_get_irq_handler_data(host_data);
@@ -951,7 +956,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
* which is fine for a never started interrupt.
*/
xive_native_configure_irq(hw_irq,
- xive_vp(xive, state->act_server),
+ kvmppc_xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
@@ -1025,9 +1030,17 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
state->pt_number = 0;
state->pt_data = NULL;
+ /*
+ * Reset ESB guest mapping. Needed when ESB pages are exposed
+ * to the guest in XIVE native mode
+ */
+ if (xive->ops && xive->ops->reset_mapped) {
+ xive->ops->reset_mapped(kvm, guest_irq);
+ }
+
/* Reconfigure the IPI */
xive_native_configure_irq(state->ipi_number,
- xive_vp(xive, state->act_server),
+ kvmppc_xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
@@ -1049,7 +1062,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
}
EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
-static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvm *kvm = vcpu->kvm;
@@ -1083,14 +1096,35 @@ static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
arch_spin_unlock(&sb->lock);
}
}
+
+ /* Disable vcpu's escalation interrupt */
+ if (vcpu->arch.xive_esc_on) {
+ __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+ XIVE_ESB_SET_PQ_01));
+ vcpu->arch.xive_esc_on = false;
+ }
+
+ /*
+ * Clear pointers to escalation interrupt ESB.
+ * This is safe because the vcpu->mutex is held, preventing
+ * any other CPU from concurrently executing a KVM_RUN ioctl.
+ */
+ vcpu->arch.xive_esc_vaddr = 0;
+ vcpu->arch.xive_esc_raddr = 0;
}
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
- struct kvmppc_xive *xive = xc->xive;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
int i;
+ if (!kvmppc_xics_enabled(vcpu))
+ return;
+
+ if (!xc)
+ return;
+
pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
/* Ensure no interrupt is still routed to that VP */
@@ -1129,6 +1163,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
}
/* Free the VP */
kfree(xc);
+
+ /* Cleanup the vcpu */
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+ vcpu->arch.xive_vcpu = NULL;
}
int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
@@ -1146,7 +1184,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
}
if (xive->kvm != vcpu->kvm)
return -EPERM;
- if (vcpu->arch.irq_type)
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
pr_devel("Duplicate !\n");
@@ -1166,7 +1204,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
- xc->vp_id = xive_vp(xive, cpu);
+ xc->vp_id = kvmppc_xive_vp(xive, cpu);
xc->mfrr = 0xff;
xc->valid = true;
@@ -1219,7 +1257,8 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
if (r == 0 && !xive->single_escalation)
- xive_attach_escalation(vcpu, i);
+ kvmppc_xive_attach_escalation(
+ vcpu, i, xive->single_escalation);
if (r)
goto bail;
} else {
@@ -1234,7 +1273,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
}
/* If not done above, attach priority 0 escalation */
- r = xive_attach_escalation(vcpu, 0);
+ r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
if (r)
goto bail;
@@ -1485,8 +1524,8 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
return 0;
}
-static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive,
- int irq)
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
+ struct kvmppc_xive *xive, int irq)
{
struct kvm *kvm = xive->kvm;
struct kvmppc_xive_src_block *sb;
@@ -1509,6 +1548,7 @@ static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *x
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
+ sb->irq_state[i].eisn = 0;
sb->irq_state[i].guest_priority = MASKED;
sb->irq_state[i].saved_priority = MASKED;
sb->irq_state[i].act_priority = MASKED;
@@ -1565,7 +1605,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb) {
pr_devel("No source, creating source block...\n");
- sb = xive_create_src_block(xive, irq);
+ sb = kvmppc_xive_create_src_block(xive, irq);
if (!sb) {
pr_devel("Failed to create block...\n");
return -ENOMEM;
@@ -1789,7 +1829,7 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
xive_cleanup_irq_data(xd);
}
-static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
{
int i;
@@ -1810,16 +1850,55 @@ static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
}
}
-static void kvmppc_xive_free(struct kvm_device *dev)
+/*
+ * Called when device fd is closed. kvm->lock is held.
+ */
+static void kvmppc_xive_release(struct kvm_device *dev)
{
struct kvmppc_xive *xive = dev->private;
struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
int i;
+ int was_ready;
+
+ pr_devel("Releasing xive device\n");
debugfs_remove(xive->dentry);
- if (kvm)
- kvm->arch.xive = NULL;
+ /*
+ * Clearing mmu_ready temporarily while holding kvm->lock
+ * is a way of ensuring that no vcpus can enter the guest
+ * until we drop kvm->lock. Doing kick_all_cpus_sync()
+ * ensures that any vcpu executing inside the guest has
+ * exited the guest. Once kick_all_cpus_sync() has finished,
+ * we know that no vcpu can be executing the XIVE push or
+ * pull code, or executing a XICS hcall.
+ *
+ * Since this is the device release function, we know that
+ * userspace does not have any open fd referring to the
+ * device. Therefore there can not be any of the device
+ * attribute set/get functions being executed concurrently,
+ * and similarly, the connect_vcpu and set/clr_mapped
+ * functions also cannot be being executed.
+ */
+ was_ready = kvm->arch.mmu_ready;
+ kvm->arch.mmu_ready = 0;
+ kick_all_cpus_sync();
+
+ /*
+ * We should clean up the vCPU interrupt presenters first.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /*
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+ * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently.
+ */
+ mutex_lock(&vcpu->mutex);
+ kvmppc_xive_cleanup_vcpu(vcpu);
+ mutex_unlock(&vcpu->mutex);
+ }
+
+ kvm->arch.xive = NULL;
/* Mask and free interrupts */
for (i = 0; i <= xive->max_sbid; i++) {
@@ -1832,11 +1911,47 @@ static void kvmppc_xive_free(struct kvm_device *dev)
if (xive->vp_base != XIVE_INVALID_VP)
xive_native_free_vp_block(xive->vp_base);
+ kvm->arch.mmu_ready = was_ready;
+
+ /*
+ * A reference of the kvmppc_xive pointer is now kept under
+ * the xive_devices struct of the machine for reuse. It is
+ * freed when the VM is destroyed for now until we fix all the
+ * execution paths.
+ */
- kfree(xive);
kfree(dev);
}
+/*
+ * When the guest chooses the interrupt mode (XICS legacy or XIVE
+ * native), the VM will switch of KVM device. The previous device will
+ * be "released" before the new one is created.
+ *
+ * Until we are sure all execution paths are well protected, provide a
+ * fail safe (transitional) method for device destruction, in which
+ * the XIVE device pointer is recycled and not directly freed.
+ */
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
+{
+ struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
+ &kvm->arch.xive_devices.native :
+ &kvm->arch.xive_devices.xics_on_xive;
+ struct kvmppc_xive *xive = *kvm_xive_device;
+
+ if (!xive) {
+ xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+ *kvm_xive_device = xive;
+ } else {
+ memset(xive, 0, sizeof(*xive));
+ }
+
+ return xive;
+}
+
+/*
+ * Create a XICS device with XIVE backend. kvm->lock is held.
+ */
static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xive *xive;
@@ -1845,7 +1960,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
pr_devel("Creating xive for partition\n");
- xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+ xive = kvmppc_xive_get_device(kvm, type);
if (!xive)
return -ENOMEM;
@@ -1883,6 +1998,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
return 0;
}
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int i;
+
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+ u32 i0, i1, idx;
+
+ if (!q->qpage && !xc->esc_virq[i])
+ continue;
+
+ seq_printf(m, " [q%d]: ", i);
+
+ if (q->qpage) {
+ idx = q->idx;
+ i0 = be32_to_cpup(q->qpage + idx);
+ idx = (idx + 1) & q->msk;
+ i1 = be32_to_cpup(q->qpage + idx);
+ seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
+ i0, i1);
+ }
+ if (xc->esc_virq[i]) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+ struct xive_irq_data *xd =
+ irq_data_get_irq_handler_data(d);
+ u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+ seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+ (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+ xc->esc_virq[i], pq, xd->eoi_page);
+ seq_puts(m, "\n");
+ }
+ }
+ return 0;
+}
static int xive_debug_show(struct seq_file *m, void *private)
{
@@ -1908,7 +2060,6 @@ static int xive_debug_show(struct seq_file *m, void *private)
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
- unsigned int i;
if (!xc)
continue;
@@ -1918,33 +2069,8 @@ static int xive_debug_show(struct seq_file *m, void *private)
xc->server_num, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
- for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
- struct xive_q *q = &xc->queues[i];
- u32 i0, i1, idx;
-
- if (!q->qpage && !xc->esc_virq[i])
- continue;
- seq_printf(m, " [q%d]: ", i);
-
- if (q->qpage) {
- idx = q->idx;
- i0 = be32_to_cpup(q->qpage + idx);
- idx = (idx + 1) & q->msk;
- i1 = be32_to_cpup(q->qpage + idx);
- seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
- }
- if (xc->esc_virq[i]) {
- struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
- struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
- seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
- (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
- (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
- xc->esc_virq[i], pq, xd->eoi_page);
- seq_printf(m, "\n");
- }
- }
+ kvmppc_xive_debug_show_queues(m, vcpu);
t_rm_h_xirr += xc->stat_rm_h_xirr;
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
@@ -1999,7 +2125,7 @@ struct kvm_device_ops kvm_xive_ops = {
.name = "kvm-xive",
.create = kvmppc_xive_create,
.init = kvmppc_xive_init,
- .destroy = kvmppc_xive_free,
+ .release = kvmppc_xive_release,
.set_attr = xive_set_attr,
.get_attr = xive_get_attr,
.has_attr = xive_has_attr,
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index a08ae6fd4c51..426146332984 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -13,6 +13,13 @@
#include "book3s_xics.h"
/*
+ * The XIVE Interrupt source numbers are within the range 0 to
+ * KVMPPC_XICS_NR_IRQS.
+ */
+#define KVMPPC_XIVE_FIRST_IRQ 0
+#define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS
+
+/*
* State for one guest irq source.
*
* For each guest source we allocate a HW interrupt in the XIVE
@@ -54,6 +61,9 @@ struct kvmppc_xive_irq_state {
bool saved_p;
bool saved_q;
u8 saved_scan_prio;
+
+ /* Xive native */
+ u32 eisn; /* Guest Effective IRQ number */
};
/* Select the "right" interrupt (IPI vs. passthrough) */
@@ -84,6 +94,11 @@ struct kvmppc_xive_src_block {
struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
};
+struct kvmppc_xive;
+
+struct kvmppc_xive_ops {
+ int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq);
+};
struct kvmppc_xive {
struct kvm *kvm;
@@ -122,6 +137,10 @@ struct kvmppc_xive {
/* Flags */
u8 single_escalation;
+
+ struct kvmppc_xive_ops *ops;
+ struct address_space *mapping;
+ struct mutex mapping_lock;
};
#define KVMPPC_XIVE_Q_COUNT 8
@@ -198,6 +217,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
return xive->src_blocks[bid];
}
+static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+ return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
/*
* Mapping between guest priorities and host priorities
* is as follow.
@@ -248,5 +272,18 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
+/*
+ * Common Xive routines for XICS-over-XIVE and XIVE native
+ */
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu);
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu);
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
+ struct kvmppc_xive *xive, int irq);
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb);
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+ bool single_escalation);
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
+
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
new file mode 100644
index 000000000000..6a8e698c4b6e
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -0,0 +1,1249 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2019, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+{
+ u64 val;
+
+ if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
+ offset |= offset << 4;
+
+ val = in_be64(xd->eoi_mmio + offset);
+ return (u8)val;
+}
+
+static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_q *q = &xc->queues[prio];
+
+ xive_native_disable_queue(xc->vp_id, q, prio);
+ if (q->qpage) {
+ put_page(virt_to_page(q->qpage));
+ q->qpage = NULL;
+ }
+}
+
+void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ int i;
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return;
+
+ if (!xc)
+ return;
+
+ pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+ /* Ensure no interrupt is still routed to that VP */
+ xc->valid = false;
+ kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Free the queues & associated interrupts */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ /* Free the escalation irq */
+ if (xc->esc_virq[i]) {
+ free_irq(xc->esc_virq[i], vcpu);
+ irq_dispose_mapping(xc->esc_virq[i]);
+ kfree(xc->esc_virq_names[i]);
+ xc->esc_virq[i] = 0;
+ }
+
+ /* Free the queue */
+ kvmppc_xive_native_cleanup_queue(vcpu, i);
+ }
+
+ /* Free the VP */
+ kfree(xc);
+
+ /* Cleanup the vcpu */
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+ vcpu->arch.xive_vcpu = NULL;
+}
+
+int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 server_num)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvmppc_xive_vcpu *xc = NULL;
+ int rc;
+
+ pr_devel("native_connect_vcpu(server=%d)\n", server_num);
+
+ if (dev->ops != &kvm_xive_native_ops) {
+ pr_devel("Wrong ops !\n");
+ return -EPERM;
+ }
+ if (xive->kvm != vcpu->kvm)
+ return -EPERM;
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+ return -EBUSY;
+ if (server_num >= KVM_MAX_VCPUS) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&vcpu->kvm->lock);
+
+ if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
+ pr_devel("Duplicate !\n");
+ rc = -EEXIST;
+ goto bail;
+ }
+
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+ if (!xc) {
+ rc = -ENOMEM;
+ goto bail;
+ }
+
+ vcpu->arch.xive_vcpu = xc;
+ xc->xive = xive;
+ xc->vcpu = vcpu;
+ xc->server_num = server_num;
+
+ xc->vp_id = kvmppc_xive_vp(xive, server_num);
+ xc->valid = true;
+ vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
+
+ rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+ if (rc) {
+ pr_err("Failed to get VP info from OPAL: %d\n", rc);
+ goto bail;
+ }
+
+ /*
+ * Enable the VP first as the single escalation mode will
+ * affect escalation interrupts numbering
+ */
+ rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ if (rc) {
+ pr_err("Failed to enable VP in OPAL: %d\n", rc);
+ goto bail;
+ }
+
+ /* Configure VCPU fields for use by assembly push/pull */
+ vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+ vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+ /* TODO: reset all queues to a clean state ? */
+bail:
+ mutex_unlock(&vcpu->kvm->lock);
+ if (rc)
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+
+ return rc;
+}
+
+/*
+ * Device passthrough support
+ */
+static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+
+ if (irq >= KVMPPC_XIVE_NR_IRQS)
+ return -EINVAL;
+
+ /*
+ * Clear the ESB pages of the IRQ number being mapped (or
+ * unmapped) into the guest and let the the VM fault handler
+ * repopulate with the appropriate ESB pages (device or IC)
+ */
+ pr_debug("clearing esb pages for girq 0x%lx\n", irq);
+ mutex_lock(&xive->mapping_lock);
+ if (xive->mapping)
+ unmap_mapping_range(xive->mapping,
+ irq * (2ull << PAGE_SHIFT),
+ 2ull << PAGE_SHIFT, 1);
+ mutex_unlock(&xive->mapping_lock);
+ return 0;
+}
+
+static struct kvmppc_xive_ops kvmppc_xive_native_ops = {
+ .reset_mapped = kvmppc_xive_native_reset_mapped,
+};
+
+static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct kvm_device *dev = vma->vm_file->private_data;
+ struct kvmppc_xive *xive = dev->private;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct xive_irq_data *xd;
+ u32 hw_num;
+ u16 src;
+ u64 page;
+ unsigned long irq;
+ u64 page_offset;
+
+ /*
+ * Linux/KVM uses a two pages ESB setting, one for trigger and
+ * one for EOI
+ */
+ page_offset = vmf->pgoff - vma->vm_pgoff;
+ irq = page_offset / 2;
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb) {
+ pr_devel("%s: source %lx not found !\n", __func__, irq);
+ return VM_FAULT_SIGBUS;
+ }
+
+ state = &sb->irq_state[src];
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ arch_spin_lock(&sb->lock);
+
+ /*
+ * first/even page is for trigger
+ * second/odd page is for EOI and management.
+ */
+ page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
+ arch_spin_unlock(&sb->lock);
+
+ if (WARN_ON(!page)) {
+ pr_err("%s: accessing invalid ESB page for source %lx !\n",
+ __func__, irq);
+ return VM_FAULT_SIGBUS;
+ }
+
+ vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct xive_native_esb_vmops = {
+ .fault = xive_native_esb_fault,
+};
+
+static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+
+ switch (vmf->pgoff - vma->vm_pgoff) {
+ case 0: /* HW - forbid access */
+ case 1: /* HV - forbid access */
+ return VM_FAULT_SIGBUS;
+ case 2: /* OS */
+ vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
+ return VM_FAULT_NOPAGE;
+ case 3: /* USER - TODO */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+static const struct vm_operations_struct xive_native_tima_vmops = {
+ .fault = xive_native_tima_fault,
+};
+
+static int kvmppc_xive_native_mmap(struct kvm_device *dev,
+ struct vm_area_struct *vma)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ /* We only allow mappings at fixed offset for now */
+ if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
+ if (vma_pages(vma) > 4)
+ return -EINVAL;
+ vma->vm_ops = &xive_native_tima_vmops;
+ } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
+ if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
+ return -EINVAL;
+ vma->vm_ops = &xive_native_esb_vmops;
+ } else {
+ return -EINVAL;
+ }
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+ /*
+ * Grab the KVM device file address_space to be able to clear
+ * the ESB pages mapping when a device is passed-through into
+ * the guest.
+ */
+ xive->mapping = vma->vm_file->f_mapping;
+ return 0;
+}
+
+static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
+ u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u64 val;
+ u16 idx;
+ int rc;
+
+ pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+ if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
+ return -E2BIG;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb) {
+ pr_debug("No source, creating source block...\n");
+ sb = kvmppc_xive_create_src_block(xive, irq);
+ if (!sb) {
+ pr_err("Failed to create block...\n");
+ return -ENOMEM;
+ }
+ }
+ state = &sb->irq_state[idx];
+
+ if (get_user(val, ubufp)) {
+ pr_err("fault getting user info !\n");
+ return -EFAULT;
+ }
+
+ arch_spin_lock(&sb->lock);
+
+ /*
+ * If the source doesn't already have an IPI, allocate
+ * one and get the corresponding data
+ */
+ if (!state->ipi_number) {
+ state->ipi_number = xive_native_alloc_irq();
+ if (state->ipi_number == 0) {
+ pr_err("Failed to allocate IRQ !\n");
+ rc = -ENXIO;
+ goto unlock;
+ }
+ xive_native_populate_irq_data(state->ipi_number,
+ &state->ipi_data);
+ pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
+ state->ipi_number, irq);
+ }
+
+ /* Restore LSI state */
+ if (val & KVM_XIVE_LEVEL_SENSITIVE) {
+ state->lsi = true;
+ if (val & KVM_XIVE_LEVEL_ASSERTED)
+ state->asserted = true;
+ pr_devel(" LSI ! Asserted=%d\n", state->asserted);
+ }
+
+ /* Mask IRQ to start with */
+ state->act_server = 0;
+ state->act_priority = MASKED;
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+
+ /* Increment the number of valid sources and mark this one valid */
+ if (!state->valid)
+ xive->src_count++;
+ state->valid = true;
+
+ rc = 0;
+
+unlock:
+ arch_spin_unlock(&sb->lock);
+
+ return rc;
+}
+
+static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ struct kvmppc_xive_irq_state *state,
+ u32 server, u8 priority, bool masked,
+ u32 eisn)
+{
+ struct kvm *kvm = xive->kvm;
+ u32 hw_num;
+ int rc = 0;
+
+ arch_spin_lock(&sb->lock);
+
+ if (state->act_server == server && state->act_priority == priority &&
+ state->eisn == eisn)
+ goto unlock;
+
+ pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
+ priority, server, masked, state->act_server,
+ state->act_priority);
+
+ kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+ if (priority != MASKED && !masked) {
+ rc = kvmppc_xive_select_target(kvm, &server, priority);
+ if (rc)
+ goto unlock;
+
+ state->act_priority = priority;
+ state->act_server = server;
+ state->eisn = eisn;
+
+ rc = xive_native_configure_irq(hw_num,
+ kvmppc_xive_vp(xive, server),
+ priority, eisn);
+ } else {
+ state->act_priority = MASKED;
+ state->act_server = 0;
+ state->eisn = 0;
+
+ rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
+ }
+
+unlock:
+ arch_spin_unlock(&sb->lock);
+ return rc;
+}
+
+static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
+ long irq, u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 src;
+ u64 kvm_cfg;
+ u32 server;
+ u8 priority;
+ bool masked;
+ u32 eisn;
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[src];
+
+ if (!state->valid)
+ return -EINVAL;
+
+ if (get_user(kvm_cfg, ubufp))
+ return -EFAULT;
+
+ pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
+
+ priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
+ KVM_XIVE_SOURCE_PRIORITY_SHIFT;
+ server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
+ KVM_XIVE_SOURCE_SERVER_SHIFT;
+ masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
+ KVM_XIVE_SOURCE_MASKED_SHIFT;
+ eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
+ KVM_XIVE_SOURCE_EISN_SHIFT;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("invalid priority for queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+
+ return kvmppc_xive_native_update_source_config(xive, sb, state, server,
+ priority, masked, eisn);
+}
+
+static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
+ long irq, u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct xive_irq_data *xd;
+ u32 hw_num;
+ u16 src;
+ int rc = 0;
+
+ pr_devel("%s irq=0x%lx", __func__, irq);
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[src];
+
+ rc = -EINVAL;
+
+ arch_spin_lock(&sb->lock);
+
+ if (state->valid) {
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+ xive_native_sync_source(hw_num);
+ rc = 0;
+ }
+
+ arch_spin_unlock(&sb->lock);
+ return rc;
+}
+
+static int xive_native_validate_queue_size(u32 qshift)
+{
+ /*
+ * We only support 64K pages for the moment. This is also
+ * advertised in the DT property "ibm,xive-eq-sizes"
+ */
+ switch (qshift) {
+ case 0: /* EQ reset */
+ case 16:
+ return 0;
+ case 12:
+ case 21:
+ case 24:
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
+ long eq_idx, u64 addr)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_xive_vcpu *xc;
+ void __user *ubufp = (void __user *) addr;
+ u32 server;
+ u8 priority;
+ struct kvm_ppc_xive_eq kvm_eq;
+ int rc;
+ __be32 *qaddr = 0;
+ struct page *page;
+ struct xive_q *q;
+ gfn_t gfn;
+ unsigned long page_size;
+
+ /*
+ * Demangle priority/server tuple from the EQ identifier
+ */
+ priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
+ KVM_XIVE_EQ_PRIORITY_SHIFT;
+ server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
+ KVM_XIVE_EQ_SERVER_SHIFT;
+
+ if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
+ return -EFAULT;
+
+ vcpu = kvmppc_xive_find_server(kvm, server);
+ if (!vcpu) {
+ pr_err("Can't find server %d\n", server);
+ return -ENOENT;
+ }
+ xc = vcpu->arch.xive_vcpu;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("Trying to restore invalid queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+ q = &xc->queues[priority];
+
+ pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
+ __func__, server, priority, kvm_eq.flags,
+ kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
+
+ /*
+ * sPAPR specifies a "Unconditional Notify (n) flag" for the
+ * H_INT_SET_QUEUE_CONFIG hcall which forces notification
+ * without using the coalescing mechanisms provided by the
+ * XIVE END ESBs. This is required on KVM as notification
+ * using the END ESBs is not supported.
+ */
+ if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
+ pr_err("invalid flags %d\n", kvm_eq.flags);
+ return -EINVAL;
+ }
+
+ rc = xive_native_validate_queue_size(kvm_eq.qshift);
+ if (rc) {
+ pr_err("invalid queue size %d\n", kvm_eq.qshift);
+ return rc;
+ }
+
+ /* reset queue and disable queueing */
+ if (!kvm_eq.qshift) {
+ q->guest_qaddr = 0;
+ q->guest_qshift = 0;
+
+ rc = xive_native_configure_queue(xc->vp_id, q, priority,
+ NULL, 0, true);
+ if (rc) {
+ pr_err("Failed to reset queue %d for VCPU %d: %d\n",
+ priority, xc->server_num, rc);
+ return rc;
+ }
+
+ if (q->qpage) {
+ put_page(virt_to_page(q->qpage));
+ q->qpage = NULL;
+ }
+
+ return 0;
+ }
+
+ if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
+ pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
+ 1ull << kvm_eq.qshift);
+ return -EINVAL;
+ }
+
+ gfn = gpa_to_gfn(kvm_eq.qaddr);
+ page = gfn_to_page(kvm, gfn);
+ if (is_error_page(page)) {
+ pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
+ return -EINVAL;
+ }
+
+ page_size = kvm_host_page_size(kvm, gfn);
+ if (1ull << kvm_eq.qshift > page_size) {
+ pr_warn("Incompatible host page size %lx!\n", page_size);
+ return -EINVAL;
+ }
+
+ qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
+
+ /*
+ * Backup the queue page guest address to the mark EQ page
+ * dirty for migration.
+ */
+ q->guest_qaddr = kvm_eq.qaddr;
+ q->guest_qshift = kvm_eq.qshift;
+
+ /*
+ * Unconditional Notification is forced by default at the
+ * OPAL level because the use of END ESBs is not supported by
+ * Linux.
+ */
+ rc = xive_native_configure_queue(xc->vp_id, q, priority,
+ (__be32 *) qaddr, kvm_eq.qshift, true);
+ if (rc) {
+ pr_err("Failed to configure queue %d for VCPU %d: %d\n",
+ priority, xc->server_num, rc);
+ put_page(page);
+ return rc;
+ }
+
+ /*
+ * Only restore the queue state when needed. When doing the
+ * H_INT_SET_SOURCE_CONFIG hcall, it should not.
+ */
+ if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
+ rc = xive_native_set_queue_state(xc->vp_id, priority,
+ kvm_eq.qtoggle,
+ kvm_eq.qindex);
+ if (rc)
+ goto error;
+ }
+
+ rc = kvmppc_xive_attach_escalation(vcpu, priority,
+ xive->single_escalation);
+error:
+ if (rc)
+ kvmppc_xive_native_cleanup_queue(vcpu, priority);
+ return rc;
+}
+
+static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
+ long eq_idx, u64 addr)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_xive_vcpu *xc;
+ struct xive_q *q;
+ void __user *ubufp = (u64 __user *) addr;
+ u32 server;
+ u8 priority;
+ struct kvm_ppc_xive_eq kvm_eq;
+ u64 qaddr;
+ u64 qshift;
+ u64 qeoi_page;
+ u32 escalate_irq;
+ u64 qflags;
+ int rc;
+
+ /*
+ * Demangle priority/server tuple from the EQ identifier
+ */
+ priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
+ KVM_XIVE_EQ_PRIORITY_SHIFT;
+ server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
+ KVM_XIVE_EQ_SERVER_SHIFT;
+
+ vcpu = kvmppc_xive_find_server(kvm, server);
+ if (!vcpu) {
+ pr_err("Can't find server %d\n", server);
+ return -ENOENT;
+ }
+ xc = vcpu->arch.xive_vcpu;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("invalid priority for queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+ q = &xc->queues[priority];
+
+ memset(&kvm_eq, 0, sizeof(kvm_eq));
+
+ if (!q->qpage)
+ return 0;
+
+ rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
+ &qeoi_page, &escalate_irq, &qflags);
+ if (rc)
+ return rc;
+
+ kvm_eq.flags = 0;
+ if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
+ kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
+
+ kvm_eq.qshift = q->guest_qshift;
+ kvm_eq.qaddr = q->guest_qaddr;
+
+ rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
+ &kvm_eq.qindex);
+ if (rc)
+ return rc;
+
+ pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
+ __func__, server, priority, kvm_eq.flags,
+ kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
+
+ if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
+{
+ int i;
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+ if (!state->valid)
+ continue;
+
+ if (state->act_priority == MASKED)
+ continue;
+
+ state->eisn = 0;
+ state->act_server = 0;
+ state->act_priority = MASKED;
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+ if (state->pt_number) {
+ xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->pt_number,
+ 0, MASKED, 0);
+ }
+ }
+}
+
+static int kvmppc_xive_reset(struct kvmppc_xive *xive)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned int i;
+
+ pr_devel("%s\n", __func__);
+
+ mutex_lock(&kvm->lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int prio;
+
+ if (!xc)
+ continue;
+
+ kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+ for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+
+ /* Single escalation, no queue 7 */
+ if (prio == 7 && xive->single_escalation)
+ break;
+
+ if (xc->esc_virq[prio]) {
+ free_irq(xc->esc_virq[prio], vcpu);
+ irq_dispose_mapping(xc->esc_virq[prio]);
+ kfree(xc->esc_virq_names[prio]);
+ xc->esc_virq[prio] = 0;
+ }
+
+ kvmppc_xive_native_cleanup_queue(vcpu, prio);
+ }
+ }
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_reset_sources(sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ mutex_unlock(&kvm->lock);
+
+ return 0;
+}
+
+static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
+{
+ int j;
+
+ for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
+ struct xive_irq_data *xd;
+ u32 hw_num;
+
+ if (!state->valid)
+ continue;
+
+ /*
+ * The struct kvmppc_xive_irq_state reflects the state
+ * of the EAS configuration and not the state of the
+ * source. The source is masked setting the PQ bits to
+ * '-Q', which is what is being done before calling
+ * the KVM_DEV_XIVE_EQ_SYNC control.
+ *
+ * If a source EAS is configured, OPAL syncs the XIVE
+ * IC of the source and the XIVE IC of the previous
+ * target if any.
+ *
+ * So it should be fine ignoring MASKED sources as
+ * they have been synced already.
+ */
+ if (state->act_priority == MASKED)
+ continue;
+
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+ xive_native_sync_source(hw_num);
+ xive_native_sync_queue(hw_num);
+ }
+}
+
+static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int prio;
+
+ if (!xc)
+ return -ENOENT;
+
+ for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+ struct xive_q *q = &xc->queues[prio];
+
+ if (!q->qpage)
+ continue;
+
+ /* Mark EQ page dirty for migration */
+ mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
+ }
+ return 0;
+}
+
+static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned int i;
+
+ pr_devel("%s\n", __func__);
+
+ mutex_lock(&kvm->lock);
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_native_sync_sources(sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvmppc_xive_native_vcpu_eq_sync(vcpu);
+ }
+ mutex_unlock(&kvm->lock);
+
+ return 0;
+}
+
+static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XIVE_RESET:
+ return kvmppc_xive_reset(xive);
+ case KVM_DEV_XIVE_EQ_SYNC:
+ return kvmppc_xive_native_eq_sync(xive);
+ }
+ break;
+ case KVM_DEV_XIVE_GRP_SOURCE:
+ return kvmppc_xive_native_set_source(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
+ return kvmppc_xive_native_set_source_config(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return kvmppc_xive_native_set_queue_config(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
+ return kvmppc_xive_native_sync_source(xive, attr->attr,
+ attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return kvmppc_xive_native_get_queue_config(xive, attr->attr,
+ attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XIVE_RESET:
+ case KVM_DEV_XIVE_EQ_SYNC:
+ return 0;
+ }
+ break;
+ case KVM_DEV_XIVE_GRP_SOURCE:
+ case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
+ case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
+ if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
+ attr->attr < KVMPPC_XIVE_NR_IRQS)
+ return 0;
+ break;
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return 0;
+ }
+ return -ENXIO;
+}
+
+/*
+ * Called when device fd is closed
+ */
+static void kvmppc_xive_native_release(struct kvm_device *dev)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ int i;
+ int was_ready;
+
+ debugfs_remove(xive->dentry);
+
+ pr_devel("Releasing xive native device\n");
+
+ /*
+ * Clearing mmu_ready temporarily while holding kvm->lock
+ * is a way of ensuring that no vcpus can enter the guest
+ * until we drop kvm->lock. Doing kick_all_cpus_sync()
+ * ensures that any vcpu executing inside the guest has
+ * exited the guest. Once kick_all_cpus_sync() has finished,
+ * we know that no vcpu can be executing the XIVE push or
+ * pull code or accessing the XIVE MMIO regions.
+ *
+ * Since this is the device release function, we know that
+ * userspace does not have any open fd or mmap referring to
+ * the device. Therefore there can not be any of the
+ * device attribute set/get, mmap, or page fault functions
+ * being executed concurrently, and similarly, the
+ * connect_vcpu and set/clr_mapped functions also cannot
+ * be being executed.
+ */
+ was_ready = kvm->arch.mmu_ready;
+ kvm->arch.mmu_ready = 0;
+ kick_all_cpus_sync();
+
+ /*
+ * We should clean up the vCPU interrupt presenters first.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /*
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+ * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
+ */
+ mutex_lock(&vcpu->mutex);
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+ mutex_unlock(&vcpu->mutex);
+ }
+
+ kvm->arch.xive = NULL;
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ if (xive->src_blocks[i])
+ kvmppc_xive_free_sources(xive->src_blocks[i]);
+ kfree(xive->src_blocks[i]);
+ xive->src_blocks[i] = NULL;
+ }
+
+ if (xive->vp_base != XIVE_INVALID_VP)
+ xive_native_free_vp_block(xive->vp_base);
+
+ kvm->arch.mmu_ready = was_ready;
+
+ /*
+ * A reference of the kvmppc_xive pointer is now kept under
+ * the xive_devices struct of the machine for reuse. It is
+ * freed when the VM is destroyed for now until we fix all the
+ * execution paths.
+ */
+
+ kfree(dev);
+}
+
+/*
+ * Create a XIVE device. kvm->lock is held.
+ */
+static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
+{
+ struct kvmppc_xive *xive;
+ struct kvm *kvm = dev->kvm;
+ int ret = 0;
+
+ pr_devel("Creating xive native device\n");
+
+ if (kvm->arch.xive)
+ return -EEXIST;
+
+ xive = kvmppc_xive_get_device(kvm, type);
+ if (!xive)
+ return -ENOMEM;
+
+ dev->private = xive;
+ xive->dev = dev;
+ xive->kvm = kvm;
+ kvm->arch.xive = xive;
+ mutex_init(&xive->mapping_lock);
+
+ /*
+ * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
+ * a default. Getting the max number of CPUs the VM was
+ * configured with would improve our usage of the XIVE VP space.
+ */
+ xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
+ pr_devel("VP_Base=%x\n", xive->vp_base);
+
+ if (xive->vp_base == XIVE_INVALID_VP)
+ ret = -ENXIO;
+
+ xive->single_escalation = xive_native_has_single_escalation();
+ xive->ops = &kvmppc_xive_native_ops;
+
+ if (ret)
+ kfree(xive);
+
+ return ret;
+}
+
+/*
+ * Interrupt Pending Buffer (IPB) offset
+ */
+#define TM_IPB_SHIFT 40
+#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)
+
+int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ u64 opal_state;
+ int rc;
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc)
+ return -ENOENT;
+
+ /* Thread context registers. We only care about IPB and CPPR */
+ val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
+
+ /* Get the VP state from OPAL */
+ rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
+ if (rc)
+ return rc;
+
+ /*
+ * Capture the backup of IPB register in the NVT structure and
+ * merge it in our KVM VP state.
+ */
+ val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
+
+ pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
+ __func__,
+ vcpu->arch.xive_saved_state.nsr,
+ vcpu->arch.xive_saved_state.cppr,
+ vcpu->arch.xive_saved_state.ipb,
+ vcpu->arch.xive_saved_state.pipr,
+ vcpu->arch.xive_saved_state.w01,
+ (u32) vcpu->arch.xive_cam_word, opal_state);
+
+ return 0;
+}
+
+int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+
+ pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
+ val->xive_timaval[0], val->xive_timaval[1]);
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc || !xive)
+ return -ENOENT;
+
+ /* We can't update the state of a "pushed" VCPU */
+ if (WARN_ON(vcpu->arch.xive_pushed))
+ return -EBUSY;
+
+ /*
+ * Restore the thread context registers. IPB and CPPR should
+ * be the only ones that matter.
+ */
+ vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
+
+ /*
+ * There is no need to restore the XIVE internal state (IPB
+ * stored in the NVT) as the IPB register was merged in KVM VP
+ * state when captured.
+ */
+ return 0;
+}
+
+static int xive_native_debug_show(struct seq_file *m, void *private)
+{
+ struct kvmppc_xive *xive = m->private;
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned int i;
+
+ if (!kvm)
+ return 0;
+
+ seq_puts(m, "=========\nVCPU state\n=========\n");
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ if (!xc)
+ continue;
+
+ seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+ xc->server_num,
+ vcpu->arch.xive_saved_state.nsr,
+ vcpu->arch.xive_saved_state.cppr,
+ vcpu->arch.xive_saved_state.ipb,
+ vcpu->arch.xive_saved_state.pipr,
+ vcpu->arch.xive_saved_state.w01,
+ (u32) vcpu->arch.xive_cam_word);
+
+ kvmppc_xive_debug_show_queues(m, vcpu);
+ }
+
+ return 0;
+}
+
+static int xive_native_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, xive_native_debug_show, inode->i_private);
+}
+
+static const struct file_operations xive_native_debug_fops = {
+ .open = xive_native_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void xive_native_debugfs_init(struct kvmppc_xive *xive)
+{
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
+ if (!name) {
+ pr_err("%s: no memory for name\n", __func__);
+ return;
+ }
+
+ xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+ xive, &xive_native_debug_fops);
+
+ pr_debug("%s: created %s\n", __func__, name);
+ kfree(name);
+}
+
+static void kvmppc_xive_native_init(struct kvm_device *dev)
+{
+ struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
+
+ /* Register some debug interfaces */
+ xive_native_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_native_ops = {
+ .name = "kvm-xive-native",
+ .create = kvmppc_xive_native_create,
+ .init = kvmppc_xive_native_init,
+ .release = kvmppc_xive_native_release,
+ .set_attr = kvmppc_xive_native_set_attr,
+ .get_attr = kvmppc_xive_native_get_attr,
+ .has_attr = kvmppc_xive_native_has_attr,
+ .mmap = kvmppc_xive_native_mmap,
+};
+
+void kvmppc_xive_native_init_module(void)
+{
+ ;
+}
+
+void kvmppc_xive_native_exit_module(void)
+{
+ ;
+}
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 033363d6e764..0737acfd17f1 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -130,24 +130,14 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
*/
prio = ffs(pending) - 1;
- /*
- * If the most favoured prio we found pending is less
- * favored (or equal) than a pending IPI, we return
- * the IPI instead.
- *
- * Note: If pending was 0 and mfrr is 0xff, we will
- * not spurriously take an IPI because mfrr cannot
- * then be smaller than cppr.
- */
- if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
- prio = xc->mfrr;
- hirq = XICS_IPI;
- break;
- }
-
/* Don't scan past the guest cppr */
- if (prio >= xc->cppr || prio > 7)
+ if (prio >= xc->cppr || prio > 7) {
+ if (xc->mfrr < xc->cppr) {
+ prio = xc->mfrr;
+ hirq = XICS_IPI;
+ }
break;
+ }
/* Grab queue and pointers */
q = &xc->queues[prio];
@@ -184,9 +174,12 @@ skip_ipi:
* been set and another occurrence of the IPI will trigger.
*/
if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
- if (scan_type == scan_fetch)
+ if (scan_type == scan_fetch) {
GLUE(X_PFX,source_eoi)(xc->vp_ipi,
&xc->vp_ipi_data);
+ q->idx = idx;
+ q->toggle = toggle;
+ }
/* Loop back on same queue with updated idx/toggle */
#ifdef XIVE_RUNTIME_CHECKS
WARN_ON(hirq && hirq != XICS_IPI);
@@ -199,32 +192,41 @@ skip_ipi:
if (hirq == XICS_DUMMY)
goto skip_ipi;
- /* If fetching, update queue pointers */
- if (scan_type == scan_fetch) {
- q->idx = idx;
- q->toggle = toggle;
- }
-
- /* Something found, stop searching */
- if (hirq)
- break;
-
- /* Clear the pending bit on the now empty queue */
- pending &= ~(1 << prio);
+ /* Clear the pending bit if the queue is now empty */
+ if (!hirq) {
+ pending &= ~(1 << prio);
- /*
- * Check if the queue count needs adjusting due to
- * interrupts being moved away.
- */
- if (atomic_read(&q->pending_count)) {
- int p = atomic_xchg(&q->pending_count, 0);
- if (p) {
+ /*
+ * Check if the queue count needs adjusting due to
+ * interrupts being moved away.
+ */
+ if (atomic_read(&q->pending_count)) {
+ int p = atomic_xchg(&q->pending_count, 0);
+ if (p) {
#ifdef XIVE_RUNTIME_CHECKS
- WARN_ON(p > atomic_read(&q->count));
+ WARN_ON(p > atomic_read(&q->count));
#endif
- atomic_sub(p, &q->count);
+ atomic_sub(p, &q->count);
+ }
}
}
+
+ /*
+ * If the most favoured prio we found pending is less
+ * favored (or equal) than a pending IPI, we return
+ * the IPI instead.
+ */
+ if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+ prio = xc->mfrr;
+ hirq = XICS_IPI;
+ break;
+ }
+
+ /* If fetching, update queue pointers */
+ if (scan_type == scan_fetch) {
+ q->idx = idx;
+ q->toggle = toggle;
+ }
}
/* If we are just taking a "peek", do nothing else */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8885377ec3e0..3393b166817a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -570,6 +570,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_CPU_CHAR:
r = 1;
break;
+#ifdef CONFIG_KVM_XIVE
+ case KVM_CAP_PPC_IRQ_XIVE:
+ /*
+ * We need XIVE to be enabled on the platform (implies
+ * a POWER9 processor) and the PowerNV platform, as
+ * nested is not yet supported.
+ */
+ r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
+ break;
+#endif
case KVM_CAP_PPC_ALLOC_HTAB:
r = hv_enabled;
@@ -644,9 +654,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
else
r = num_online_cpus();
break;
- case KVM_CAP_NR_MEMSLOTS:
- r = KVM_USER_MEM_SLOTS;
- break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
@@ -753,6 +760,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
else
kvmppc_xics_free_icp(vcpu);
break;
+ case KVMPPC_IRQ_XIVE:
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+ break;
}
kvmppc_core_vcpu_free(vcpu);
@@ -1941,6 +1951,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_CAP_PPC_IRQ_XIVE: {
+ struct fd f;
+ struct kvm_device *dev;
+
+ r = -EBADF;
+ f = fdget(cap->args[0]);
+ if (!f.file)
+ break;
+
+ r = -ENXIO;
+ if (!xive_enabled())
+ break;
+
+ r = -EPERM;
+ dev = kvm_device_from_filp(f.file);
+ if (dev)
+ r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
+ cap->args[1]);
+
+ fdput(f);
+ break;
+ }
+#endif /* CONFIG_KVM_XIVE */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_FWNMI:
r = -EINVAL;
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index 844d8e774492..b7f6f6e0b6e8 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -215,11 +215,20 @@ _GLOBAL_TOC(memcmp)
beq .Lzero
.Lcmp_rest_lt8bytes:
- /* Here we have only less than 8 bytes to compare with. at least s1
- * Address is aligned with 8 bytes.
- * The next double words are load and shift right with appropriate
- * bits.
+ /*
+ * Here we have less than 8 bytes to compare. At least s1 is aligned to
+ * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a
+ * page boundary, otherwise we might read past the end of the buffer and
+ * trigger a page fault. We use 4K as the conservative minimum page
+ * size. If we detect that case we go to the byte-by-byte loop.
+ *
+ * Otherwise the next double word is loaded from s1 and s2, and shifted
+ * right to compare the appropriate bits.
*/
+ clrldi r6,r4,(64-12) // r6 = r4 & 0xfff
+ cmpdi r6,0xff8
+ bgt .Lshort
+
subfic r6,r5,8
slwi r6,r6,3
LD rA,0,r3
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 549e9490ff2a..dcac37745b05 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,6 +51,8 @@
#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i)
#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \
___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \
+ ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \
___PPC_RA(base) | ((i) & 0xfffc))
#define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \
@@ -65,7 +67,9 @@
#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \
___PPC_RA(base) | IMM_L(i))
#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
+ ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \
+ ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \
___PPC_RA(base) | IMM_L(i))
#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \
@@ -85,17 +89,6 @@
___PPC_RA(a) | ___PPC_RB(b))
#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \
___PPC_RA(a) | ___PPC_RB(b))
-
-#ifdef CONFIG_PPC64
-#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0)
-#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
-#else
-#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
-#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
-#endif
-
#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index dc50a8d4b3b9..21744d8aa053 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -122,6 +122,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i)
#endif
+#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
+#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
+#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
+
#define SEEN_DATAREF 0x10000 /* might call external helpers */
#define SEEN_XREG 0x20000 /* X reg is used */
#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 3609be4692b3..47f441f351a6 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -68,6 +68,26 @@ static const int b2p[] = {
/* PPC NVR range -- update this if we ever use NVRs below r27 */
#define BPF_PPC_NVR_MIN 27
+/*
+ * WARNING: These can use TMP_REG_2 if the offset is not at word boundary,
+ * so ensure that it isn't in use already.
+ */
+#define PPC_BPF_LL(r, base, i) do { \
+ if ((i) % 4) { \
+ PPC_LI(b2p[TMP_REG_2], (i)); \
+ PPC_LDX(r, base, b2p[TMP_REG_2]); \
+ } else \
+ PPC_LD(r, base, i); \
+ } while(0)
+#define PPC_BPF_STL(r, base, i) do { \
+ if ((i) % 4) { \
+ PPC_LI(b2p[TMP_REG_2], (i)); \
+ PPC_STDX(r, base, b2p[TMP_REG_2]); \
+ } else \
+ PPC_STD(r, base, i); \
+ } while(0)
+#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
+
#define SEEN_FUNC 0x1000 /* might call external helpers */
#define SEEN_STACK 0x2000 /* uses BPF stack */
#define SEEN_TAILCALL 0x4000 /* uses tail calls */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 4194d3cfb60c..21a1dcd4b156 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -252,7 +252,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+ PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
PPC_BCC(COND_GT, out);
@@ -265,7 +265,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* prog = array->ptrs[index]; */
PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
- PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
+ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
/*
* if (prog == NULL)
@@ -275,7 +275,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
PPC_BCC(COND_EQ, out);
/* goto *(prog->bpf_func + prologue_size); */
- PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
+ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
#ifdef PPC64_ELF_ABI_v1
/* skip past the function descriptor */
PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
@@ -606,7 +606,7 @@ bpf_alu32_trunc:
* the instructions generated will remain the
* same across all passes
*/
- PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx));
+ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
break;
@@ -662,7 +662,7 @@ emit_clear:
PPC_LI32(b2p[TMP_REG_1], imm);
src_reg = b2p[TMP_REG_1];
}
- PPC_STD(src_reg, dst_reg, off);
+ PPC_BPF_STL(src_reg, dst_reg, off);
break;
/*
@@ -709,7 +709,7 @@ emit_clear:
break;
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
- PPC_LD(dst_reg, src_reg, off);
+ PPC_BPF_LL(dst_reg, src_reg, off);
break;
/*
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index daad8c45c8e7..7472244e7f30 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -260,6 +260,9 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE);
+OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE);
+OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE);
OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 6ed22127391b..921f12182f3e 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -77,18 +77,27 @@ static u32 cpu_to_drc_index(int cpu)
ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
} else {
- const __be32 *indexes;
-
- indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
- if (indexes == NULL)
- goto err_of_node_put;
+ u32 nr_drc_indexes, thread_drc_index;
/*
- * The first element indexes[0] is the number of drc_indexes
- * returned in the list. Hence thread_index+1 will get the
- * drc_index corresponding to core number thread_index.
+ * The first element of ibm,drc-indexes array is the
+ * number of drc_indexes returned in the list. Hence
+ * thread_index+1 will get the drc_index corresponding
+ * to core number thread_index.
*/
- ret = indexes[thread_index + 1];
+ rc = of_property_read_u32_index(dn, "ibm,drc-indexes",
+ 0, &nr_drc_indexes);
+ if (rc)
+ goto err_of_node_put;
+
+ WARN_ON_ONCE(thread_index > nr_drc_indexes);
+ rc = of_property_read_u32_index(dn, "ibm,drc-indexes",
+ thread_index + 1,
+ &thread_drc_index);
+ if (rc)
+ goto err_of_node_put;
+
+ ret = thread_drc_index;
}
rc = 0;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index d97d52772789..452dcfd7e5dd 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -550,6 +550,7 @@ static void pseries_print_mce_info(struct pt_regs *regs,
"UE",
"SLB",
"ERAT",
+ "Unknown",
"TLB",
"D-Cache",
"Unknown",
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 1ca127d052a6..7782201e5fe8 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -437,6 +437,12 @@ void xive_native_sync_source(u32 hw_irq)
}
EXPORT_SYMBOL_GPL(xive_native_sync_source);
+void xive_native_sync_queue(u32 hw_irq)
+{
+ opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq);
+}
+EXPORT_SYMBOL_GPL(xive_native_sync_queue);
+
static const struct xive_ops xive_native_ops = {
.populate_irq_data = xive_native_populate_irq_data,
.configure_irq = xive_native_configure_irq,
@@ -515,6 +521,9 @@ u32 xive_native_default_eq_shift(void)
}
EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
+unsigned long xive_tima_os;
+EXPORT_SYMBOL_GPL(xive_tima_os);
+
bool __init xive_native_init(void)
{
struct device_node *np;
@@ -567,6 +576,14 @@ bool __init xive_native_init(void)
for_each_possible_cpu(cpu)
kvmppc_set_xive_tima(cpu, r.start, tima);
+ /* Resource 2 is OS window */
+ if (of_address_to_resource(np, 2, &r)) {
+ pr_err("Failed to get thread mgmnt area resource\n");
+ return false;
+ }
+
+ xive_tima_os = r.start;
+
/* Grab size of provisionning pages */
xive_parse_provisioning(np);
@@ -711,3 +728,96 @@ bool xive_native_has_single_escalation(void)
return xive_has_single_esc;
}
EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
+
+int xive_native_get_queue_info(u32 vp_id, u32 prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags)
+{
+ __be64 qpage;
+ __be64 qsize;
+ __be64 qeoi_page;
+ __be32 escalate_irq;
+ __be64 qflags;
+ s64 rc;
+
+ rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize,
+ &qeoi_page, &escalate_irq, &qflags);
+ if (rc) {
+ pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ if (out_qpage)
+ *out_qpage = be64_to_cpu(qpage);
+ if (out_qsize)
+ *out_qsize = be32_to_cpu(qsize);
+ if (out_qeoi_page)
+ *out_qeoi_page = be64_to_cpu(qeoi_page);
+ if (out_escalate_irq)
+ *out_escalate_irq = be32_to_cpu(escalate_irq);
+ if (out_qflags)
+ *out_qflags = be64_to_cpu(qflags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_info);
+
+int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex)
+{
+ __be32 opal_qtoggle;
+ __be32 opal_qindex;
+ s64 rc;
+
+ rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle,
+ &opal_qindex);
+ if (rc) {
+ pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ if (qtoggle)
+ *qtoggle = be32_to_cpu(opal_qtoggle);
+ if (qindex)
+ *qindex = be32_to_cpu(opal_qindex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_state);
+
+int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
+{
+ s64 rc;
+
+ rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex);
+ if (rc) {
+ pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+
+int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
+{
+ __be64 state;
+ s64 rc;
+
+ rc = opal_xive_get_vp_state(vp_id, &state);
+ if (rc) {
+ pr_err("OPAL failed to get vp state for VCPU %d : %lld\n",
+ vp_id, rc);
+ return -EIO;
+ }
+
+ if (out_state)
+ *out_state = be64_to_cpu(state);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_state);
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 57afe604b495..c207f6634b91 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -26,7 +26,7 @@ enum fixed_addresses {
};
#define FIXADDR_SIZE (__end_of_fixed_addresses * PAGE_SIZE)
-#define FIXADDR_TOP (PAGE_OFFSET)
+#define FIXADDR_TOP (VMALLOC_START)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
#define FIXMAP_PAGE_IO PAGE_KERNEL
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index bba3da6ef157..a3d5273ded7c 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -72,32 +72,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- args[0] = regs->orig_a0;
- args++;
- i++;
- n--;
- }
- memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+ args[0] = regs->orig_a0;
+ args++;
+ memcpy(args, &regs->a1, 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- if (i == 0) {
- regs->orig_a0 = args[0];
- args++;
- i++;
- n--;
- }
- memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+ regs->orig_a0 = args[0];
+ args++;
+ memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
}
static inline int syscall_get_arch(void)
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index a00168b980d2..fb53a8089e76 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -300,7 +300,7 @@ do { \
" .balign 4\n" \
"4:\n" \
" li %0, %6\n" \
- " jump 2b, %1\n" \
+ " jump 3b, %1\n" \
" .previous\n" \
" .section __ex_table,\"a\"\n" \
" .balign " RISCV_SZPTR "\n" \
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f13f7f276639..598568168d35 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -4,7 +4,6 @@
ifdef CONFIG_FTRACE
CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_setup.o = -pg
endif
extra-y += head.o
@@ -29,8 +28,6 @@ obj-y += vdso.o
obj-y += cacheinfo.o
obj-y += vdso/
-CFLAGS_setup.o := -mcmodel=medany
-
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 7dd308129b40..2872edce894d 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -141,7 +141,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
{
s32 hi20;
- if (IS_ENABLED(CMODEL_MEDLOW)) {
+ if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
me->name, (long long)v, location);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index ecb654f6a79e..540a331d1376 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -48,14 +48,6 @@ struct screen_info screen_info = {
};
#endif
-unsigned long va_pa_offset;
-EXPORT_SYMBOL(va_pa_offset);
-unsigned long pfn_base;
-EXPORT_SYMBOL(pfn_base);
-
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
/* The lucky hart to first increment this variable will boot the other cores */
atomic_t hart_lottery;
unsigned long boot_cpu_hartid;
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index eb22ab49b3e0..b68aac701803 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -1,3 +1,9 @@
+
+CFLAGS_init.o := -mcmodel=medany
+ifdef CONFIG_FTRACE
+CFLAGS_REMOVE_init.o = -pg
+endif
+
obj-y += init.o
obj-y += fault.o
obj-y += extable.o
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b379a75ac6a6..5fd8c922e1c2 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -25,6 +25,10 @@
#include <asm/pgtable.h>
#include <asm/io.h>
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
+ __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
static void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
@@ -143,6 +147,11 @@ void __init setup_bootmem(void)
}
}
+unsigned long va_pa_offset;
+EXPORT_SYMBOL(va_pa_offset);
+unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
@@ -172,6 +181,25 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
}
}
+/*
+ * setup_vm() is called from head.S with MMU-off.
+ *
+ * Following requirements should be honoured for setup_vm() to work
+ * correctly:
+ * 1) It should use PC-relative addressing for accessing kernel symbols.
+ * To achieve this we always use GCC cmodel=medany.
+ * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
+ * so disable compiler instrumentation when FTRACE is enabled.
+ *
+ * Currently, the above requirements are honoured by using custom CFLAGS
+ * for init.o in mm/Makefile.
+ */
+
+#ifndef __riscv_cmodel_medany
+#error "setup_vm() is called from head.S before relocate so it should "
+ "not use absolute addressing."
+#endif
+
asmlinkage void __init setup_vm(void)
{
extern char _start;
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 1a6a7092d942..e94a0a28b5eb 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -360,4 +360,15 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
return reg1;
}
+/*
+ * Interface to tell the AP bus code that a configuration
+ * change has happened. The bus code should at least do
+ * an ap bus resource rescan.
+ */
+#if IS_ENABLED(CONFIG_ZCRYPT)
+void ap_bus_cfg_chg(void);
+#else
+static inline void ap_bus_cfg_chg(void){};
+#endif
+
#endif /* _ASM_S390_AP_H_ */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 3cc52e37b4b2..ce2770743cc5 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -28,6 +28,7 @@
#define CPACF_KMCTR 0xb92d /* MSA4 */
#define CPACF_PRNO 0xb93c /* MSA5 */
#define CPACF_KMA 0xb929 /* MSA8 */
+#define CPACF_KDSA 0xb93a /* MSA9 */
/*
* En/decryption modifier bits
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 7d22a474a040..f74639a05f0f 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -252,11 +252,14 @@ do { \
/*
* Cache aliasing on the latest machines calls for a mapping granularity
- * of 512KB. For 64-bit processes use a 512KB alignment and a randomization
- * of up to 1GB. For 31-bit processes the virtual address space is limited,
- * use no alignment and limit the randomization to 8MB.
+ * of 512KB for the anonymous mapping base. For 64-bit processes use a
+ * 512KB alignment and a randomization of up to 1GB. For 31-bit processes
+ * the virtual address space is limited, use no alignment and limit the
+ * randomization to 8MB.
+ * For the additional randomization of the program break use 32MB for
+ * 64-bit and 8MB for 31-bit.
*/
-#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ffffUL)
+#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x1fffUL)
#define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL)
#define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL)
#define STACK_RND_MASK MMAP_RND_MASK
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c47e22bba87f..bdbc81b5bc91 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -278,6 +278,7 @@ struct kvm_s390_sie_block {
#define ECD_HOSTREGMGMT 0x20000000
#define ECD_MEF 0x08000000
#define ECD_ETOKENF 0x02000000
+#define ECD_ECC 0x00200000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
@@ -312,6 +313,7 @@ struct kvm_vcpu_stat {
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
+ u64 halt_no_poll_steal;
u64 halt_wakeup;
u64 instruction_lctl;
u64 instruction_lctlg;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index cc0947e08b6f..5b9f10b1e55d 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -91,52 +91,53 @@ struct lowcore {
__u64 hardirq_timer; /* 0x02e8 */
__u64 softirq_timer; /* 0x02f0 */
__u64 steal_timer; /* 0x02f8 */
- __u64 last_update_timer; /* 0x0300 */
- __u64 last_update_clock; /* 0x0308 */
- __u64 int_clock; /* 0x0310 */
- __u64 mcck_clock; /* 0x0318 */
- __u64 clock_comparator; /* 0x0320 */
- __u64 boot_clock[2]; /* 0x0328 */
+ __u64 avg_steal_timer; /* 0x0300 */
+ __u64 last_update_timer; /* 0x0308 */
+ __u64 last_update_clock; /* 0x0310 */
+ __u64 int_clock; /* 0x0318*/
+ __u64 mcck_clock; /* 0x0320 */
+ __u64 clock_comparator; /* 0x0328 */
+ __u64 boot_clock[2]; /* 0x0330 */
/* Current process. */
- __u64 current_task; /* 0x0338 */
- __u64 kernel_stack; /* 0x0340 */
+ __u64 current_task; /* 0x0340 */
+ __u64 kernel_stack; /* 0x0348 */
/* Interrupt, DAT-off and restartstack. */
- __u64 async_stack; /* 0x0348 */
- __u64 nodat_stack; /* 0x0350 */
- __u64 restart_stack; /* 0x0358 */
+ __u64 async_stack; /* 0x0350 */
+ __u64 nodat_stack; /* 0x0358 */
+ __u64 restart_stack; /* 0x0360 */
/* Restart function and parameter. */
- __u64 restart_fn; /* 0x0360 */
- __u64 restart_data; /* 0x0368 */
- __u64 restart_source; /* 0x0370 */
+ __u64 restart_fn; /* 0x0368 */
+ __u64 restart_data; /* 0x0370 */
+ __u64 restart_source; /* 0x0378 */
/* Address space pointer. */
- __u64 kernel_asce; /* 0x0378 */
- __u64 user_asce; /* 0x0380 */
- __u64 vdso_asce; /* 0x0388 */
+ __u64 kernel_asce; /* 0x0380 */
+ __u64 user_asce; /* 0x0388 */
+ __u64 vdso_asce; /* 0x0390 */
/*
* The lpp and current_pid fields form a
* 64-bit value that is set as program
* parameter with the LPP instruction.
*/
- __u32 lpp; /* 0x0390 */
- __u32 current_pid; /* 0x0394 */
+ __u32 lpp; /* 0x0398 */
+ __u32 current_pid; /* 0x039c */
/* SMP info area */
- __u32 cpu_nr; /* 0x0398 */
- __u32 softirq_pending; /* 0x039c */
- __u32 preempt_count; /* 0x03a0 */
- __u32 spinlock_lockval; /* 0x03a4 */
- __u32 spinlock_index; /* 0x03a8 */
- __u32 fpu_flags; /* 0x03ac */
- __u64 percpu_offset; /* 0x03b0 */
- __u64 vdso_per_cpu_data; /* 0x03b8 */
- __u64 machine_flags; /* 0x03c0 */
- __u64 gmap; /* 0x03c8 */
- __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */
+ __u32 cpu_nr; /* 0x03a0 */
+ __u32 softirq_pending; /* 0x03a4 */
+ __u32 preempt_count; /* 0x03a8 */
+ __u32 spinlock_lockval; /* 0x03ac */
+ __u32 spinlock_index; /* 0x03b0 */
+ __u32 fpu_flags; /* 0x03b4 */
+ __u64 percpu_offset; /* 0x03b8 */
+ __u64 vdso_per_cpu_data; /* 0x03c0 */
+ __u64 machine_flags; /* 0x03c8 */
+ __u64 gmap; /* 0x03d0 */
+ __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */
/* br %r1 trampoline */
__u16 br_r1_trampoline; /* 0x0400 */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 96f9a9151fde..59c3e91f2cdb 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -56,40 +56,32 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
unsigned long mask = -1UL;
+ unsigned int n = 6;
- /*
- * No arguments for this syscall, there's nothing to do.
- */
- if (!n)
- return;
-
- BUG_ON(i + n > 6);
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_31BIT))
mask = 0xffffffff;
#endif
while (n-- > 0)
- if (i + n > 0)
- args[n] = regs->gprs[2 + i + n] & mask;
- if (i == 0)
- args[0] = regs->orig_gpr2 & mask;
+ if (n > 0)
+ args[n] = regs->gprs[2 + n] & mask;
+
+ args[0] = regs->orig_gpr2 & mask;
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
+ unsigned int n = 6;
+
while (n-- > 0)
- if (i + n > 0)
- regs->gprs[2 + i + n] = args[n];
- if (i == 0)
- regs->orig_gpr2 = args[0];
+ if (n > 0)
+ regs->gprs[2 + n] = args[n];
+ regs->orig_gpr2 = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 16511d97e8dc..47104e5b47fd 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -152,7 +152,10 @@ struct kvm_s390_vm_cpu_subfunc {
__u8 pcc[16]; /* with MSA4 */
__u8 ppno[16]; /* with MSA5 */
__u8 kma[16]; /* with MSA8 */
- __u8 reserved[1808];
+ __u8 kdsa[16]; /* with MSA9 */
+ __u8 sortl[32]; /* with STFLE.150 */
+ __u8 dfltcc[32]; /* with STFLE.151 */
+ __u8 reserved[1728];
};
/* kvm attributes for crypto */
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index c6fad208c2fa..b6854812d2ed 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -196,23 +196,30 @@ static void cf_diag_perf_event_destroy(struct perf_event *event)
*/
static int __hw_perf_event_init(struct perf_event *event)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
struct perf_event_attr *attr = &event->attr;
+ struct cpu_cf_events *cpuhw;
enum cpumf_ctr_set i;
int err = 0;
- debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d authorized %#x\n", __func__,
- event, event->cpu, cpuhw->info.auth_ctl);
+ debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__,
+ event, event->cpu);
event->hw.config = attr->config;
event->hw.config_base = 0;
- local64_set(&event->count, 0);
- /* Add all authorized counter sets to config_base */
+ /* Add all authorized counter sets to config_base. The
+ * the hardware init function is either called per-cpu or just once
+ * for all CPUS (event->cpu == -1). This depends on the whether
+ * counting is started for all CPUs or on a per workload base where
+ * the perf event moves from one CPU to another CPU.
+ * Checking the authorization on any CPU is fine as the hardware
+ * applies the same authorization settings to all CPUs.
+ */
+ cpuhw = &get_cpu_var(cpu_cf_events);
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
event->hw.config_base |= cpumf_ctr_ctl[i];
+ put_cpu_var(cpu_cf_events);
/* No authorized counter sets, nothing to count/sample */
if (!event->hw.config_base) {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3fe1c77c361b..bd197baf1dc3 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -266,7 +266,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = S390_lowcore.kernel_asce;
lc->machine_flags = S390_lowcore.machine_flags;
- lc->user_timer = lc->system_timer = lc->steal_timer = 0;
+ lc->user_timer = lc->system_timer =
+ lc->steal_timer = lc->avg_steal_timer = 0;
__ctl_store(lc->cregs_save_area, 0, 15);
save_access_regs((unsigned int *) lc->access_regs_save_area);
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 98f850e00008..a69a0911ed0e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -124,7 +124,7 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime,
*/
static int do_account_vtime(struct task_struct *tsk)
{
- u64 timer, clock, user, guest, system, hardirq, softirq, steal;
+ u64 timer, clock, user, guest, system, hardirq, softirq;
timer = S390_lowcore.last_update_timer;
clock = S390_lowcore.last_update_clock;
@@ -182,12 +182,6 @@ static int do_account_vtime(struct task_struct *tsk)
if (softirq)
account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
- steal = S390_lowcore.steal_timer;
- if ((s64) steal > 0) {
- S390_lowcore.steal_timer = 0;
- account_steal_time(cputime_to_nsecs(steal));
- }
-
return virt_timer_forward(user + guest + system + hardirq + softirq);
}
@@ -213,8 +207,19 @@ void vtime_task_switch(struct task_struct *prev)
*/
void vtime_flush(struct task_struct *tsk)
{
+ u64 steal, avg_steal;
+
if (do_account_vtime(tsk))
virt_timer_expire();
+
+ steal = S390_lowcore.steal_timer;
+ avg_steal = S390_lowcore.avg_steal_timer / 2;
+ if ((s64) steal > 0) {
+ S390_lowcore.steal_timer = 0;
+ account_steal_time(steal);
+ avg_steal += steal;
+ }
+ S390_lowcore.avg_steal_timer = avg_steal;
}
/*
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 767453faacfc..e987e73738b5 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -31,6 +31,7 @@ config KVM
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_INVALID_WAKEUPS
+ select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
---help---
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 82162867f378..bfd55ad34a3e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -14,6 +14,7 @@
#include <linux/kvm_host.h>
#include <linux/hrtimer.h>
#include <linux/mmu_context.h>
+#include <linux/nospec.h>
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
@@ -2307,6 +2308,7 @@ static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
{
if (id >= MAX_S390_IO_ADAPTERS)
return NULL;
+ id = array_index_nospec(id, MAX_S390_IO_ADAPTERS);
return kvm->arch.adapters[id];
}
@@ -2320,8 +2322,13 @@ static int register_io_adapter(struct kvm_device *dev,
(void __user *)attr->addr, sizeof(adapter_info)))
return -EFAULT;
- if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
- (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+ if (adapter_info.id >= MAX_S390_IO_ADAPTERS)
+ return -EINVAL;
+
+ adapter_info.id = array_index_nospec(adapter_info.id,
+ MAX_S390_IO_ADAPTERS);
+
+ if (dev->kvm->arch.adapters[adapter_info.id] != NULL)
return -EINVAL;
adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4638303ba6a8..8d6d75db8de6 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -75,6 +75,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
+ { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
@@ -177,6 +178,11 @@ static int hpage;
module_param(hpage, int, 0444);
MODULE_PARM_DESC(hpage, "1m huge page backing support");
+/* maximum percentage of steal time for polling. >100 is treated like 100 */
+static u8 halt_poll_max_steal = 10;
+module_param(halt_poll_max_steal, byte, 0644);
+MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling");
+
/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -321,6 +327,22 @@ static inline int plo_test_bit(unsigned char nr)
return cc == 0;
}
+static inline void __insn32_query(unsigned int opcode, u8 query[32])
+{
+ register unsigned long r0 asm("0") = 0; /* query function */
+ register unsigned long r1 asm("1") = (unsigned long) query;
+
+ asm volatile(
+ /* Parameter regs are ignored */
+ " .insn rrf,%[opc] << 16,2,4,6,0\n"
+ : "=m" (*query)
+ : "d" (r0), "a" (r1), [opc] "i" (opcode)
+ : "cc");
+}
+
+#define INSN_SORTL 0xb938
+#define INSN_DFLTCC 0xb939
+
static void kvm_s390_cpu_feat_init(void)
{
int i;
@@ -368,6 +390,16 @@ static void kvm_s390_cpu_feat_init(void)
__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
kvm_s390_available_subfunc.kma);
+ if (test_facility(155)) /* MSA9 */
+ __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kdsa);
+
+ if (test_facility(150)) /* SORTL */
+ __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
+
+ if (test_facility(151)) /* DFLTCC */
+ __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
+
if (MACHINE_HAS_ESOP)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
@@ -513,9 +545,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
else if (sclp.has_esca && sclp.has_64bscao)
r = KVM_S390_ESCA_CPU_SLOTS;
break;
- case KVM_CAP_NR_MEMSLOTS:
- r = KVM_USER_MEM_SLOTS;
- break;
case KVM_CAP_S390_COW:
r = MACHINE_HAS_ESOP;
break;
@@ -657,6 +686,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
set_kvm_facility(kvm->arch.model.fac_mask, 135);
set_kvm_facility(kvm->arch.model.fac_list, 135);
}
+ if (test_facility(148)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 148);
+ set_kvm_facility(kvm->arch.model.fac_list, 148);
+ }
+ if (test_facility(152)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 152);
+ set_kvm_facility(kvm->arch.model.fac_list, 152);
+ }
r = 0;
} else
r = -EINVAL;
@@ -1323,6 +1360,19 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
+ VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
+ VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
+ VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
return 0;
}
@@ -1491,6 +1541,19 @@ static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
+ VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
+ VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
+ VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
return 0;
}
@@ -1546,6 +1609,19 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
+ VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
+ VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
+ ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
+ ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
+ VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
+ ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
+ ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
return 0;
}
@@ -2817,6 +2893,25 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
vcpu->arch.enabled_gmap = vcpu->arch.gmap;
}
+static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
+{
+ if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
+ test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
+ return true;
+ return false;
+}
+
+static bool kvm_has_pckmo_ecc(struct kvm *kvm)
+{
+ /* At least one ECC subfunction must be present */
+ return kvm_has_pckmo_subfunc(kvm, 32) ||
+ kvm_has_pckmo_subfunc(kvm, 33) ||
+ kvm_has_pckmo_subfunc(kvm, 34) ||
+ kvm_has_pckmo_subfunc(kvm, 40) ||
+ kvm_has_pckmo_subfunc(kvm, 41);
+
+}
+
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
/*
@@ -2829,13 +2924,19 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
vcpu->arch.sie_block->eca &= ~ECA_APIE;
+ vcpu->arch.sie_block->ecd &= ~ECD_ECC;
if (vcpu->kvm->arch.crypto.apie)
vcpu->arch.sie_block->eca |= ECA_APIE;
/* Set up protected key support */
- if (vcpu->kvm->arch.crypto.aes_kw)
+ if (vcpu->kvm->arch.crypto.aes_kw) {
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
+ /* ecc is also wrapped with AES key */
+ if (kvm_has_pckmo_ecc(vcpu->kvm))
+ vcpu->arch.sie_block->ecd |= ECD_ECC;
+ }
+
if (vcpu->kvm->arch.crypto.dea_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
}
@@ -3068,6 +3169,17 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
}
}
+bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
+{
+ /* do not poll with more than halt_poll_max_steal percent of steal time */
+ if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
+ halt_poll_max_steal) {
+ vcpu->stat.halt_no_poll_steal++;
+ return true;
+ }
+ return false;
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index d62fa148558b..076090f9e666 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -288,7 +288,9 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
unsigned long *b1, *b2;
u8 ecb3_flags;
+ u32 ecd_flags;
int apie_h;
+ int apie_s;
int key_msk = test_kvm_facility(vcpu->kvm, 76);
int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
@@ -297,7 +299,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->crycbd = 0;
apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
- if (!apie_h && (!key_msk || fmt_o == CRYCB_FORMAT0))
+ apie_s = apie_h & scb_o->eca;
+ if (!apie_s && (!key_msk || (fmt_o == CRYCB_FORMAT0)))
return 0;
if (!crycb_addr)
@@ -308,7 +311,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
((crycb_addr + 128) & PAGE_MASK))
return set_validity_icpt(scb_s, 0x003CU);
- if (apie_h && (scb_o->eca & ECA_APIE)) {
+ if (apie_s) {
ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
vcpu->kvm->arch.crypto.crycb,
fmt_o, fmt_h);
@@ -320,7 +323,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
/* we may only allow it if enabled for guest 2 */
ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
(ECB3_AES | ECB3_DEA);
- if (!ecb3_flags)
+ ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC;
+ if (!ecb3_flags && !ecd_flags)
goto end;
/* copy only the wrapping keys */
@@ -329,6 +333,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return set_validity_icpt(scb_s, 0x0035U);
scb_s->ecb3 |= ecb3_flags;
+ scb_s->ecd |= ecd_flags;
/* xor both blocks in one run */
b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
@@ -339,7 +344,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
end:
switch (ret) {
case -EINVAL:
- return set_validity_icpt(scb_s, 0x0020U);
+ return set_validity_icpt(scb_s, 0x0022U);
case -EFAULT:
return set_validity_icpt(scb_s, 0x0035U);
case -EACCES:
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index fd788e0f2e5b..cead9e0dcffb 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -93,6 +93,9 @@ static struct facility_def facility_defs[] = {
131, /* enhanced-SOP 2 and side-effect */
139, /* multiple epoch facility */
146, /* msa extension 8 */
+ 150, /* enhanced sort */
+ 151, /* deflate conversion */
+ 155, /* msa extension 9 */
-1 /* END */
}
},
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 958f46da3a79..d91065e81a4e 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -164,10 +164,10 @@ static struct sh_machine_vector __initmv sh_of_generic_mv = {
struct sh_clk_ops;
-void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
+void __init __weak arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
{
}
-void __init plat_irq_setup(void)
+void __init __weak plat_irq_setup(void)
{
}
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index a6ef3fee5f85..7bf2cb680d32 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += emergency-restart.h
generic-y += exec.h
generic-y += irq_regs.h
generic-y += irq_work.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 6e118799831c..8c9d7e5e5dcc 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -48,51 +48,28 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- /*
- * Do this simply for now. If we need to start supporting
- * fetching arguments from arbitrary indices, this will need some
- * extra logic. Presently there are no in-tree users that depend
- * on this behaviour.
- */
- BUG_ON(i);
/* Argument pattern is: R4, R5, R6, R7, R0, R1 */
- switch (n) {
- case 6: args[5] = regs->regs[1];
- case 5: args[4] = regs->regs[0];
- case 4: args[3] = regs->regs[7];
- case 3: args[2] = regs->regs[6];
- case 2: args[1] = regs->regs[5];
- case 1: args[0] = regs->regs[4];
- case 0:
- break;
- default:
- BUG();
- }
+ args[5] = regs->regs[1];
+ args[4] = regs->regs[0];
+ args[3] = regs->regs[7];
+ args[2] = regs->regs[6];
+ args[1] = regs->regs[5];
+ args[0] = regs->regs[4];
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- /* Same note as above applies */
- BUG_ON(i);
-
- switch (n) {
- case 6: regs->regs[1] = args[5];
- case 5: regs->regs[0] = args[4];
- case 4: regs->regs[7] = args[3];
- case 3: regs->regs[6] = args[2];
- case 2: regs->regs[5] = args[1];
- case 1: regs->regs[4] = args[0];
- break;
- default:
- BUG();
- }
+ regs->regs[1] = args[5];
+ regs->regs[0] = args[4];
+ regs->regs[7] = args[3];
+ regs->regs[6] = args[2];
+ regs->regs[5] = args[1];
+ regs->regs[4] = args[0];
}
static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
index 43882580c7f9..22fad97da066 100644
--- a/arch/sh/include/asm/syscall_64.h
+++ b/arch/sh/include/asm/syscall_64.h
@@ -47,20 +47,16 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &regs->regs[2 + i], n * sizeof(args[0]));
+ memcpy(args, &regs->regs[2], 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(&regs->regs[2 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->regs[2], args, 6 * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index ecfbd40924dd..b8812c74c1de 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
generated-y += unistd_32.h
-generic-y += kvm_para.h
generic-y += ucontext.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index b82f64e28f55..a22cfd5c0ee8 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += exec.h
generic-y += export.h
generic-y += irq_regs.h
generic-y += irq_work.h
+generic-y += kvm_para.h
generic-y += linkage.h
generic-y += local.h
generic-y += local64.h
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 053989e3f6a6..4d075434e816 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -96,11 +96,11 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
int zero_extend = 0;
unsigned int j;
+ unsigned int n = 6;
#ifdef CONFIG_SPARC64
if (test_tsk_thread_flag(task, TIF_32BIT))
@@ -108,7 +108,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
#endif
for (j = 0; j < n; j++) {
- unsigned long val = regs->u_regs[UREG_I0 + i + j];
+ unsigned long val = regs->u_regs[UREG_I0 + j];
if (zero_extend)
args[j] = (u32) val;
@@ -119,13 +119,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- unsigned int j;
+ unsigned int i;
- for (j = 0; j < n; j++)
- regs->u_regs[UREG_I0 + i + j] = args[j];
+ for (i = 0; i < 6; i++)
+ regs->u_regs[UREG_I0 + i] = args[i];
}
static inline int syscall_get_arch(void)
diff --git a/arch/sparc/include/uapi/asm/kvm_para.h b/arch/sparc/include/uapi/asm/kvm_para.h
deleted file mode 100644
index baacc4996d18..000000000000
--- a/arch/sparc/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/kvm_para.h>
diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h
index 9fb9cf8cd39a..98e50c50c12e 100644
--- a/arch/um/include/asm/syscall-generic.h
+++ b/arch/um/include/asm/syscall-generic.h
@@ -53,84 +53,30 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
const struct uml_pt_regs *r = &regs->regs;
- switch (i) {
- case 0:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG1(r);
- case 1:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG2(r);
- case 2:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG3(r);
- case 3:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG4(r);
- case 4:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG5(r);
- case 5:
- if (!n--)
- break;
- *args++ = UPT_SYSCALL_ARG6(r);
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- break;
- }
+ *args++ = UPT_SYSCALL_ARG1(r);
+ *args++ = UPT_SYSCALL_ARG2(r);
+ *args++ = UPT_SYSCALL_ARG3(r);
+ *args++ = UPT_SYSCALL_ARG4(r);
+ *args++ = UPT_SYSCALL_ARG5(r);
+ *args = UPT_SYSCALL_ARG6(r);
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
struct uml_pt_regs *r = &regs->regs;
- switch (i) {
- case 0:
- if (!n--)
- break;
- UPT_SYSCALL_ARG1(r) = *args++;
- case 1:
- if (!n--)
- break;
- UPT_SYSCALL_ARG2(r) = *args++;
- case 2:
- if (!n--)
- break;
- UPT_SYSCALL_ARG3(r) = *args++;
- case 3:
- if (!n--)
- break;
- UPT_SYSCALL_ARG4(r) = *args++;
- case 4:
- if (!n--)
- break;
- UPT_SYSCALL_ARG5(r) = *args++;
- case 5:
- if (!n--)
- break;
- UPT_SYSCALL_ARG6(r) = *args++;
- case 6:
- if (!n--)
- break;
- default:
- BUG();
- break;
- }
+ UPT_SYSCALL_ARG1(r) = *args++;
+ UPT_SYSCALL_ARG2(r) = *args++;
+ UPT_SYSCALL_ARG3(r) = *args++;
+ UPT_SYSCALL_ARG4(r) = *args++;
+ UPT_SYSCALL_ARG5(r) = *args++;
+ UPT_SYSCALL_ARG6(r) = *args;
}
/* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 1d1544b6ca74..d77d953c04c1 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
generic-y += ucontext.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c..5ad92419be19 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2217,14 +2217,8 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
If unsure, leave at the default value.
config HOTPLUG_CPU
- bool "Support for hot-pluggable CPUs"
+ def_bool y
depends on SMP
- ---help---
- Say Y here to allow turning CPUs off and on. CPUs can be
- controlled through /sys/devices/system/cpu.
- ( Note: power management support will enable this option
- automatically on SMP systems. )
- Say N if you want to disable CPU hotplug.
config BOOTPARAM_HOTPLUG_CPU0
bool "Set default setting of cpu0_hotpluggable"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2d8b9d8ca4f8..a587805c6687 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -219,8 +219,12 @@ ifdef CONFIG_RETPOLINE
# Additionally, avoid generating expensive indirect jumps which
# are subject to retpolines for small number of switch cases.
# clang turns off jump table generation by default when under
- # retpoline builds, however, gcc does not for x86.
- KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20)
+ # retpoline builds, however, gcc does not for x86. This has
+ # only been fixed starting from gcc stable version 8.4.0 and
+ # onwards, but not for older ones. See gcc bug #86952.
+ ifndef CONFIG_CC_IS_CLANG
+ KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables)
+ endif
endif
archscripts: scripts_basic
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index fd13655e0f9b..d2f184165934 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -120,8 +120,6 @@ static inline void console_init(void)
void set_sev_encryption_mask(void);
-#endif
-
/* acpi.c */
#ifdef CONFIG_ACPI
acpi_physical_address get_rsdp_addr(void);
@@ -135,3 +133,5 @@ int count_immovable_mem_regions(void);
#else
static inline int count_immovable_mem_regions(void) { return 0; }
#endif
+
+#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8baa441d8000..386151b2c62f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2307,7 +2307,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
if (__test_and_clear_bit(55, (unsigned long *)&status)) {
handled++;
- intel_pt_interrupt();
+ if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
+ perf_guest_cbs->handle_intel_pt_intr))
+ perf_guest_cbs->handle_intel_pt_intr();
+ else
+ intel_pt_interrupt();
}
/*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ce95b8cbd229..0e56ff7e4848 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -112,8 +112,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
test_cpu_cap(c, bit))
#define this_cpu_has(bit) \
- (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
- x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
+ x86_this_cpu_test_bit(bit, \
+ (unsigned long __percpu *)&cpu_info.x86_capability))
/*
* This macro is for detection of features which need kernel
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 62be73b23d5c..e8f58ddd06d9 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -10,6 +10,7 @@ extern struct e820_table *e820_table_firmware;
extern unsigned long pci_mem_start;
+extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type);
extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type);
extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 93c4bf598fb0..feab24cac610 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -226,7 +226,9 @@ struct x86_emulate_ops {
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
- int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
+ int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
+ const char *smstate);
+ void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a5db4475e72d..2532bfb09d8b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
}
#define KVM_PERMILLE_MMU_PAGES 20
-#define KVM_MIN_ALLOC_MMU_PAGES 64
+#define KVM_MIN_ALLOC_MMU_PAGES 64UL
#define KVM_MMU_HASH_SHIFT 12
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
@@ -253,14 +253,14 @@ struct kvm_mmu_memory_cache {
* kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
* by indirect shadow page can not be more than 15 bits.
*
- * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
+ * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access,
* @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
*/
union kvm_mmu_page_role {
u32 word;
struct {
unsigned level:4;
- unsigned cr4_pae:1;
+ unsigned gpte_is_8_bytes:1;
unsigned quadrant:2;
unsigned direct:1;
unsigned access:3;
@@ -350,6 +350,7 @@ struct kvm_mmu_page {
};
struct kvm_pio_request {
+ unsigned long linear_rip;
unsigned long count;
int in;
int port;
@@ -468,6 +469,7 @@ struct kvm_pmu {
u64 global_ovf_ctrl;
u64 counter_bitmask[2];
u64 global_ctrl_mask;
+ u64 global_ovf_ctrl_mask;
u64 reserved_bits;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
@@ -568,6 +570,7 @@ struct kvm_vcpu_arch {
bool tpr_access_reporting;
u64 ia32_xss;
u64 microcode_version;
+ u64 arch_capabilities;
/*
* Paging state of the vcpu
@@ -778,6 +781,9 @@ struct kvm_vcpu_arch {
/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
bool l1tf_flush_l1d;
+
+ /* AMD MSRC001_0015 Hardware Configuration */
+ u64 msr_hwcr;
};
struct kvm_lpage_info {
@@ -842,9 +848,9 @@ enum kvm_irqchip_mode {
};
struct kvm_arch {
- unsigned int n_used_mmu_pages;
- unsigned int n_requested_mmu_pages;
- unsigned int n_max_mmu_pages;
+ unsigned long n_used_mmu_pages;
+ unsigned long n_requested_mmu_pages;
+ unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
@@ -1165,7 +1171,8 @@ struct kvm_x86_ops {
uint32_t guest_irq, bool set);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
- int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired);
void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
void (*setup_mce)(struct kvm_vcpu *vcpu);
@@ -1180,7 +1187,7 @@ struct kvm_x86_ops {
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
+ int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
int (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
@@ -1192,6 +1199,8 @@ struct kvm_x86_ops {
int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
+
+ bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
@@ -1252,8 +1261,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
bool pdptrs_changed(struct kvm_vcpu *vcpu);
@@ -1588,4 +1597,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+#define GET_SMSTATE(type, buf, offset) \
+ (*(type *)((buf) + (offset) - 0x7e00))
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ca5bc0eacb95..3bf5d84bb64f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -781,6 +781,14 @@
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+/* PERF_GLOBAL_OVF_CTL bits */
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT)
+
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 63b3393bd98e..c53682303c9c 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -77,7 +77,11 @@ static inline size_t real_mode_size_needed(void)
return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
}
-void set_real_mode_mem(phys_addr_t mem, size_t size);
+static inline void set_real_mode_mem(phys_addr_t mem)
+{
+ real_mode_header = (struct real_mode_header *) __va(mem);
+}
+
void reserve_real_mode(void);
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d653139857af..4c305471ec33 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -91,11 +91,9 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+ memcpy(args, &regs->bx, 6 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
@@ -116,124 +114,50 @@ static inline int syscall_get_arch(void)
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->bx;
- case 1:
- if (!n--) break;
- *args++ = regs->cx;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->si;
- case 4:
- if (!n--) break;
- *args++ = regs->di;
- case 5:
- if (!n--) break;
- *args++ = regs->bp;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ *args++ = regs->bx;
+ *args++ = regs->cx;
+ *args++ = regs->dx;
+ *args++ = regs->si;
+ *args++ = regs->di;
+ *args = regs->bp;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- *args++ = regs->di;
- case 1:
- if (!n--) break;
- *args++ = regs->si;
- case 2:
- if (!n--) break;
- *args++ = regs->dx;
- case 3:
- if (!n--) break;
- *args++ = regs->r10;
- case 4:
- if (!n--) break;
- *args++ = regs->r8;
- case 5:
- if (!n--) break;
- *args++ = regs->r9;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ *args++ = regs->di;
+ *args++ = regs->si;
+ *args++ = regs->dx;
+ *args++ = regs->r10;
+ *args++ = regs->r8;
+ *args = regs->r9;
+ }
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread_info.status & TS_COMPAT)
- switch (i) {
- case 0:
- if (!n--) break;
- regs->bx = *args++;
- case 1:
- if (!n--) break;
- regs->cx = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->si = *args++;
- case 4:
- if (!n--) break;
- regs->di = *args++;
- case 5:
- if (!n--) break;
- regs->bp = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
- else
+ if (task->thread_info.status & TS_COMPAT) {
+ regs->bx = *args++;
+ regs->cx = *args++;
+ regs->dx = *args++;
+ regs->si = *args++;
+ regs->di = *args++;
+ regs->bp = *args;
+ } else
# endif
- switch (i) {
- case 0:
- if (!n--) break;
- regs->di = *args++;
- case 1:
- if (!n--) break;
- regs->si = *args++;
- case 2:
- if (!n--) break;
- regs->dx = *args++;
- case 3:
- if (!n--) break;
- regs->r10 = *args++;
- case 4:
- if (!n--) break;
- regs->r8 = *args++;
- case 5:
- if (!n--) break;
- regs->r9 = *args++;
- case 6:
- if (!n--) break;
- default:
- BUG();
- break;
- }
+ {
+ regs->di = *args++;
+ regs->si = *args++;
+ regs->dx = *args++;
+ regs->r10 = *args++;
+ regs->r8 = *args++;
+ regs->r9 = *args;
+ }
}
static inline int syscall_get_arch(void)
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index de6f0d59a24f..2863c2026655 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -206,6 +206,9 @@ xen_single_call(unsigned int call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
+ if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
+ return -EINVAL;
+
asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
: [thunk_target] "a" (&hypercall_page[call])
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd398..d213ec5c3766 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -146,6 +146,7 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
+#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f33f11f69078..1573a0a6b525 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -501,11 +501,8 @@ out_unlock:
void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
- struct rdt_resource *r;
int cpu;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
-
cpu = cpumask_any(&dom->cpu_mask);
dom->cqm_work_cpu = cpu;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 2879e234e193..76dd605ee2a3 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -73,12 +73,13 @@ EXPORT_SYMBOL(pci_mem_start);
* This function checks if any part of the range <start,end> is mapped
* with type.
*/
-bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
+static bool _e820__mapped_any(struct e820_table *table,
+ u64 start, u64 end, enum e820_type type)
{
int i;
- for (i = 0; i < e820_table->nr_entries; i++) {
- struct e820_entry *entry = &e820_table->entries[i];
+ for (i = 0; i < table->nr_entries; i++) {
+ struct e820_entry *entry = &table->entries[i];
if (type && entry->type != type)
continue;
@@ -88,6 +89,17 @@ bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
}
return 0;
}
+
+bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
+{
+ return _e820__mapped_any(e820_table_firmware, start, end, type);
+}
+EXPORT_SYMBOL_GPL(e820__mapped_raw_any);
+
+bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
+{
+ return _e820__mapped_any(e820_table, start, end, type);
+}
EXPORT_SYMBOL_GPL(e820__mapped_any);
/*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index fd3951638ae4..fc8ccf596624 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -962,13 +962,13 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
return 1;
- eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
- ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ eax = kvm_rax_read(vcpu);
+ ecx = kvm_rcx_read(vcpu);
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
- kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
- kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
- kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
- kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+ kvm_rax_write(vcpu, eax);
+ kvm_rbx_write(vcpu, ebx);
+ kvm_rcx_write(vcpu, ecx);
+ kvm_rdx_write(vcpu, edx);
return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c338984c850d..d0d5dd44b4f4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2331,24 +2331,18 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
+#ifdef CONFIG_X86_64
u32 eax, ebx, ecx, edx;
eax = 0x80000001;
ecx = 0;
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
return edx & bit(X86_FEATURE_LM);
+#else
+ return false;
+#endif
}
-#define GET_SMSTATE(type, smbase, offset) \
- ({ \
- type __val; \
- int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
- sizeof(__val)); \
- if (r != X86EMUL_CONTINUE) \
- return X86EMUL_UNHANDLEABLE; \
- __val; \
- })
-
static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
{
desc->g = (flags >> 23) & 1;
@@ -2361,27 +2355,30 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
desc->type = (flags >> 8) & 15;
}
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
{
struct desc_struct desc;
int offset;
u16 selector;
- selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+ selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
return X86EMUL_CONTINUE;
}
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+#ifdef CONFIG_X86_64
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
{
struct desc_struct desc;
int offset;
@@ -2390,15 +2387,16 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
offset = 0x7e00 + n * 16;
- selector = GET_SMSTATE(u16, smbase, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- base3 = GET_SMSTATE(u32, smbase, offset + 12);
+ selector = GET_SMSTATE(u16, smstate, offset);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ base3 = GET_SMSTATE(u32, smstate, offset + 12);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
return X86EMUL_CONTINUE;
}
+#endif
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
u64 cr0, u64 cr3, u64 cr4)
@@ -2445,7 +2443,8 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
return X86EMUL_CONTINUE;
}
-static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
struct desc_struct desc;
struct desc_ptr dt;
@@ -2453,53 +2452,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
u32 val, cr0, cr3, cr4;
int i;
- cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
+ cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
+ cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
for (i = 0; i < 8; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+ *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
- val = GET_SMSTATE(u32, smbase, 0x7fcc);
+ val = GET_SMSTATE(u32, smstate, 0x7fcc);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7fc8);
+ val = GET_SMSTATE(u32, smstate, 0x7fc8);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- selector = GET_SMSTATE(u32, smbase, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
+ selector = GET_SMSTATE(u32, smstate, 0x7fc4);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
- selector = GET_SMSTATE(u32, smbase, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
+ selector = GET_SMSTATE(u32, smstate, 0x7fc0);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
ctxt->ops->set_gdt(ctxt, &dt);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
ctxt->ops->set_idt(ctxt, &dt);
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smbase, i);
+ int r = rsm_load_seg_32(ctxt, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
- cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+ cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
-static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+#ifdef CONFIG_X86_64
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
struct desc_struct desc;
struct desc_ptr dt;
@@ -2509,43 +2510,43 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
int i, r;
for (i = 0; i < 16; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+ *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
- ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
- val = GET_SMSTATE(u32, smbase, 0x7f68);
+ val = GET_SMSTATE(u32, smstate, 0x7f68);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7f60);
+ val = GET_SMSTATE(u32, smstate, 0x7f60);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
- cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
- val = GET_SMSTATE(u64, smbase, 0x7ed0);
+ cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
+ cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
+ cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+ val = GET_SMSTATE(u64, smstate, 0x7ed0);
ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
- selector = GET_SMSTATE(u32, smbase, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
- base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
+ selector = GET_SMSTATE(u32, smstate, 0x7e90);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
ctxt->ops->set_idt(ctxt, &dt);
- selector = GET_SMSTATE(u32, smbase, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
- base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
+ selector = GET_SMSTATE(u32, smstate, 0x7e70);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
@@ -2553,37 +2554,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
return r;
for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smbase, i);
+ r = rsm_load_seg_64(ctxt, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
return X86EMUL_CONTINUE;
}
+#endif
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
unsigned long cr0, cr4, efer;
+ char buf[512];
u64 smbase;
int ret;
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
+ smbase = ctxt->ops->get_smbase(ctxt);
+
+ ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+ if (ret != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ ctxt->ops->set_nmi_mask(ctxt, false);
+
+ ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
+ ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
+
/*
* Get back to real mode, to prepare a safe state in which to load
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
* supports long mode.
*/
- cr4 = ctxt->ops->get_cr(ctxt, 4);
if (emulator_has_longmode(ctxt)) {
struct desc_struct cs_desc;
/* Zero CR4.PCIDE before CR0.PG. */
- if (cr4 & X86_CR4_PCIDE) {
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PCIDE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
- cr4 &= ~X86_CR4_PCIDE;
- }
/* A 32-bit code segment is required to clear EFER.LMA. */
memset(&cs_desc, 0, sizeof(cs_desc));
@@ -2597,39 +2610,39 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
- /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
- if (cr4 & X86_CR4_PAE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
-
- /* And finally go back to 32-bit mode. */
- efer = 0;
- ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ if (emulator_has_longmode(ctxt)) {
+ /* Clear CR4.PAE before clearing EFER.LME. */
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PAE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
- smbase = ctxt->ops->get_smbase(ctxt);
+ /* And finally go back to 32-bit mode. */
+ efer = 0;
+ ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ }
/*
* Give pre_leave_smm() a chance to make ISA-specific changes to the
* vCPU state (e.g. enter guest mode) before loading state from the SMM
* state-save area.
*/
- if (ctxt->ops->pre_leave_smm(ctxt, smbase))
+ if (ctxt->ops->pre_leave_smm(ctxt, buf))
return X86EMUL_UNHANDLEABLE;
+#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, smbase + 0x8000);
+ ret = rsm_load_state_64(ctxt, buf);
else
- ret = rsm_load_state_32(ctxt, smbase + 0x8000);
+#endif
+ ret = rsm_load_state_32(ctxt, buf);
if (ret != X86EMUL_CONTINUE) {
/* FIXME: should triple fault */
return X86EMUL_UNHANDLEABLE;
}
- if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
- ctxt->ops->set_nmi_mask(ctxt, false);
+ ctxt->ops->post_leave_smm(ctxt);
- ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
- ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 27c43525a05f..7868daceb25b 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -526,7 +526,9 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
new_config.enable = 0;
stimer->config.as_uint64 = new_config.as_uint64;
- stimer_mark_pending(stimer, false);
+ if (stimer->config.enable)
+ stimer_mark_pending(stimer, false);
+
return 0;
}
@@ -542,7 +544,10 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
stimer->config.enable = 0;
else if (stimer->config.auto_enable)
stimer->config.enable = 1;
- stimer_mark_pending(stimer, false);
+
+ if (stimer->config.enable)
+ stimer_mark_pending(stimer, false);
+
return 0;
}
@@ -1521,10 +1526,10 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
longmode = is_64_bit_mode(vcpu);
if (longmode)
- kvm_register_write(vcpu, VCPU_REGS_RAX, result);
+ kvm_rax_write(vcpu, result);
else {
- kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
- kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
+ kvm_rdx_write(vcpu, result >> 32);
+ kvm_rax_write(vcpu, result & 0xffffffff);
}
}
@@ -1597,18 +1602,18 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
longmode = is_64_bit_mode(vcpu);
if (!longmode) {
- param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
- ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
- outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+ param = ((u64)kvm_rdx_read(vcpu) << 32) |
+ (kvm_rax_read(vcpu) & 0xffffffff);
+ ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
+ (kvm_rcx_read(vcpu) & 0xffffffff);
+ outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
+ (kvm_rsi_read(vcpu) & 0xffffffff);
}
#ifdef CONFIG_X86_64
else {
- param = kvm_register_read(vcpu, VCPU_REGS_RCX);
- ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
- outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+ param = kvm_rcx_read(vcpu);
+ ingpa = kvm_rdx_read(vcpu);
+ outgpa = kvm_r8_read(vcpu);
}
#endif
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f8f56a93358b..1cc6c47dc77e 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,6 +9,34 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
+#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
+static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
+{ \
+ return vcpu->arch.regs[VCPU_REGS_##uname]; \
+} \
+static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \
+ unsigned long val) \
+{ \
+ vcpu->arch.regs[VCPU_REGS_##uname] = val; \
+}
+BUILD_KVM_GPR_ACCESSORS(rax, RAX)
+BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
+BUILD_KVM_GPR_ACCESSORS(rcx, RCX)
+BUILD_KVM_GPR_ACCESSORS(rdx, RDX)
+BUILD_KVM_GPR_ACCESSORS(rbp, RBP)
+BUILD_KVM_GPR_ACCESSORS(rsi, RSI)
+BUILD_KVM_GPR_ACCESSORS(rdi, RDI)
+#ifdef CONFIG_X86_64
+BUILD_KVM_GPR_ACCESSORS(r8, R8)
+BUILD_KVM_GPR_ACCESSORS(r9, R9)
+BUILD_KVM_GPR_ACCESSORS(r10, R10)
+BUILD_KVM_GPR_ACCESSORS(r11, R11)
+BUILD_KVM_GPR_ACCESSORS(r12, R12)
+BUILD_KVM_GPR_ACCESSORS(r13, R13)
+BUILD_KVM_GPR_ACCESSORS(r14, R14)
+BUILD_KVM_GPR_ACCESSORS(r15, R15)
+#endif
+
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
@@ -37,6 +65,16 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
kvm_register_write(vcpu, VCPU_REGS_RIP, val);
}
+static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu)
+{
+ return kvm_register_read(vcpu, VCPU_REGS_RSP);
+}
+
+static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ kvm_register_write(vcpu, VCPU_REGS_RSP, val);
+}
+
static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
{
might_sleep(); /* on svm */
@@ -83,8 +121,8 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
{
- return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
- | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
+ return (kvm_rax_read(vcpu) & -1u)
+ | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32);
}
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 991fdf7fc17f..4e000712cb82 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -138,6 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
if (offset <= max_apic_id) {
u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+ offset = array_index_nospec(offset, map->max_apic_id + 1);
*cluster = &map->phys_map[offset];
*mask = dest_id & (0xffff >> (16 - cluster_size));
} else {
@@ -901,7 +902,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
if (irq->dest_id > map->max_apic_id) {
*bitmap = 0;
} else {
- *dst = &map->phys_map[irq->dest_id];
+ u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
+ *dst = &map->phys_map[dest_id];
*bitmap = 1;
}
return true;
@@ -1453,7 +1455,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
if (swait_active(q))
swake_up_one(q);
- if (apic_lvtt_tscdeadline(apic))
+ if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
ktimer->expired_tscdeadline = ktimer->tscdeadline;
}
@@ -1671,37 +1673,42 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
static bool start_hv_timer(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
- int r;
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ bool expired;
WARN_ON(preemptible());
if (!kvm_x86_ops->set_hv_timer)
return false;
- if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
- return false;
-
if (!ktimer->tscdeadline)
return false;
- r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
- if (r < 0)
+ if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
return false;
ktimer->hv_timer_in_use = true;
hrtimer_cancel(&ktimer->timer);
/*
- * Also recheck ktimer->pending, in case the sw timer triggered in
- * the window. For periodic timer, leave the hv timer running for
- * simplicity, and the deadline will be recomputed on the next vmexit.
+ * To simplify handling the periodic timer, leave the hv timer running
+ * even if the deadline timer has expired, i.e. rely on the resulting
+ * VM-Exit to recompute the periodic timer's target expiration.
*/
- if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
- if (r)
+ if (!apic_lvtt_period(apic)) {
+ /*
+ * Cancel the hv timer if the sw timer fired while the hv timer
+ * was being programmed, or if the hv timer itself expired.
+ */
+ if (atomic_read(&ktimer->pending)) {
+ cancel_hv_timer(apic);
+ } else if (expired) {
apic_timer_expired(apic);
- return false;
+ cancel_hv_timer(apic);
+ }
}
- trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
+ trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
+
return true;
}
@@ -1725,8 +1732,13 @@ static void start_sw_timer(struct kvm_lapic *apic)
static void restart_apic_timer(struct kvm_lapic *apic)
{
preempt_disable();
+
+ if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
+ goto out;
+
if (!start_hv_timer(apic))
start_sw_timer(apic);
+out:
preempt_enable();
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7837ab001d80..f7c2dee452a1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pat.h>
#include <asm/cmpxchg.h>
+#include <asm/e820/api.h>
#include <asm/io.h>
#include <asm/vmx.h>
#include <asm/kvm_page_track.h>
@@ -182,7 +183,7 @@ struct kvm_shadow_walk_iterator {
static const union kvm_mmu_page_role mmu_base_role_mask = {
.cr0_wp = 1,
- .cr4_pae = 1,
+ .gpte_is_8_bytes = 1,
.nxe = 1,
.smep_andnot_wp = 1,
.smap_andnot_wp = 1,
@@ -487,16 +488,24 @@ static void kvm_mmu_reset_all_pte_masks(void)
* If the CPU has 46 or less physical address bits, then set an
* appropriate mask to guard against L1TF attacks. Otherwise, it is
* assumed that the CPU is not vulnerable to L1TF.
+ *
+ * Some Intel CPUs address the L1 cache using more PA bits than are
+ * reported by CPUID. Use the PA width of the L1 cache when possible
+ * to achieve more effective mitigation, e.g. if system RAM overlaps
+ * the most significant bits of legal physical address space.
*/
- low_phys_bits = boot_cpu_data.x86_phys_bits;
- if (boot_cpu_data.x86_phys_bits <
+ shadow_nonpresent_or_rsvd_mask = 0;
+ low_phys_bits = boot_cpu_data.x86_cache_bits;
+ if (boot_cpu_data.x86_cache_bits <
52 - shadow_nonpresent_or_rsvd_mask_len) {
shadow_nonpresent_or_rsvd_mask =
- rsvd_bits(boot_cpu_data.x86_phys_bits -
+ rsvd_bits(boot_cpu_data.x86_cache_bits -
shadow_nonpresent_or_rsvd_mask_len,
- boot_cpu_data.x86_phys_bits - 1);
+ boot_cpu_data.x86_cache_bits - 1);
low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
- }
+ } else
+ WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF));
+
shadow_nonpresent_or_rsvd_lower_gfn_mask =
GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
}
@@ -2007,7 +2016,7 @@ static int is_empty_shadow_page(u64 *spt)
* aggregate version in order to make the slab shrinker
* faster
*/
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
@@ -2205,6 +2214,7 @@ static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);
+
#define for_each_valid_sp(_kvm, _sp, _gfn) \
hlist_for_each_entry(_sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
@@ -2215,12 +2225,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
for_each_valid_sp(_kvm, _sp, _gfn) \
if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
+static inline bool is_ept_sp(struct kvm_mmu_page *sp)
+{
+ return sp->role.cr0_wp && sp->role.smap_andnot_wp;
+}
+
/* @sp->gfn should be write-protected at the call site */
static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
- if (sp->role.cr4_pae != !!is_pae(vcpu)
- || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
+ if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) ||
+ vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return false;
}
@@ -2232,7 +2247,7 @@ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
struct list_head *invalid_list,
bool remote_flush)
{
- if (!remote_flush && !list_empty(invalid_list))
+ if (!remote_flush && list_empty(invalid_list))
return false;
if (!list_empty(invalid_list))
@@ -2423,7 +2438,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
role.level = level;
role.direct = direct;
if (role.direct)
- role.cr4_pae = 0;
+ role.gpte_is_8_bytes = true;
role.access = access;
if (!vcpu->arch.mmu->direct_map
&& vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
@@ -2757,7 +2772,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
* Changing the number of mmu pages allocated to the vm
* Note: if goal_nr_mmu_pages is too small, you will get dead lock
*/
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
LIST_HEAD(invalid_list);
@@ -2886,7 +2901,9 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
*/
(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
- return true;
+ return !e820__mapped_raw_any(pfn_to_hpa(pfn),
+ pfn_to_hpa(pfn + 1) - 1,
+ E820_TYPE_RAM);
}
/* Bits which may be returned by set_spte() */
@@ -4794,7 +4811,6 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
role.base.access = ACC_ALL;
role.base.nxe = !!is_nx(vcpu);
- role.base.cr4_pae = !!is_pae(vcpu);
role.base.cr0_wp = is_write_protection(vcpu);
role.base.smm = is_smm(vcpu);
role.base.guest_mode = is_guest_mode(vcpu);
@@ -4815,6 +4831,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
role.base.ad_disabled = (shadow_accessed_mask == 0);
role.base.level = kvm_x86_ops->get_tdp_level(vcpu);
role.base.direct = true;
+ role.base.gpte_is_8_bytes = true;
return role;
}
@@ -4879,6 +4896,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
role.base.smap_andnot_wp = role.ext.cr4_smap &&
!is_write_protection(vcpu);
role.base.direct = !is_paging(vcpu);
+ role.base.gpte_is_8_bytes = !!is_pae(vcpu);
if (!is_long_mode(vcpu))
role.base.level = PT32E_ROOT_LEVEL;
@@ -4918,18 +4936,26 @@ static union kvm_mmu_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
bool execonly)
{
- union kvm_mmu_role role;
+ union kvm_mmu_role role = {0};
- /* Base role is inherited from root_mmu */
- role.base.word = vcpu->arch.root_mmu.mmu_role.base.word;
- role.ext = kvm_calc_mmu_role_ext(vcpu);
+ /* SMM flag is inherited from root_mmu */
+ role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
role.base.level = PT64_ROOT_4LEVEL;
+ role.base.gpte_is_8_bytes = true;
role.base.direct = false;
role.base.ad_disabled = !accessed_dirty;
role.base.guest_mode = true;
role.base.access = ACC_ALL;
+ /*
+ * WP=1 and NOT_WP=1 is an impossible combination, use WP and the
+ * SMAP variation to denote shadow EPT entries.
+ */
+ role.base.cr0_wp = true;
+ role.base.smap_andnot_wp = true;
+
+ role.ext = kvm_calc_mmu_role_ext(vcpu);
role.ext.execonly = execonly;
return role;
@@ -5179,7 +5205,7 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
gpa, bytes, sp->role.word);
offset = offset_in_page(gpa);
- pte_size = sp->role.cr4_pae ? 8 : 4;
+ pte_size = sp->role.gpte_is_8_bytes ? 8 : 4;
/*
* Sometimes, the OS only writes the last one bytes to update status
@@ -5203,7 +5229,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
page_offset = offset_in_page(gpa);
level = sp->role.level;
*nspte = 1;
- if (!sp->role.cr4_pae) {
+ if (!sp->role.gpte_is_8_bytes) {
page_offset <<= 1; /* 32->64 */
/*
* A 32-bit pde maps 4MB while the shadow pdes map
@@ -5393,10 +5419,12 @@ emulate:
* This can happen if a guest gets a page-fault on data access but the HW
* table walker is not able to read the instruction page (e.g instruction
* page is not present in memory). In those cases we simply restart the
- * guest.
+ * guest, with the exception of AMD Erratum 1096 which is unrecoverable.
*/
- if (unlikely(insn && !insn_len))
- return 1;
+ if (unlikely(insn && !insn_len)) {
+ if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu))
+ return 1;
+ }
er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
@@ -5509,7 +5537,9 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
if (flush && lock_flush_tlb) {
- kvm_flush_remote_tlbs(kvm);
+ kvm_flush_remote_tlbs_with_address(kvm,
+ start_gfn,
+ iterator.gfn - start_gfn + 1);
flush = false;
}
cond_resched_lock(&kvm->mmu_lock);
@@ -5517,7 +5547,8 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
}
if (flush && lock_flush_tlb) {
- kvm_flush_remote_tlbs(kvm);
+ kvm_flush_remote_tlbs_with_address(kvm, start_gfn,
+ end_gfn - start_gfn + 1);
flush = false;
}
@@ -6011,10 +6042,10 @@ out:
/*
* Calculate mmu pages needed for kvm.
*/
-unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
{
- unsigned int nr_mmu_pages;
- unsigned int nr_pages = 0;
+ unsigned long nr_mmu_pages;
+ unsigned long nr_pages = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int i;
@@ -6027,8 +6058,7 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
}
nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
- nr_mmu_pages = max(nr_mmu_pages,
- (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+ nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
return nr_mmu_pages;
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index bbdc60f2fae8..54c2a377795b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
-static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
+static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 9f6c855a0043..dd30dccd2ad5 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -29,10 +29,10 @@
\
role.word = __entry->role; \
\
- trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \
+ trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s" \
" %snxe %sad root %u %s%c", \
__entry->gfn, role.level, \
- role.cr4_pae ? " pae" : "", \
+ role.gpte_is_8_bytes ? 8 : 4, \
role.quadrant, \
role.direct ? " direct" : "", \
access_str[role.access], \
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index e9ea2d45ae66..9f72cc427158 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -48,11 +48,6 @@ static bool msr_mtrr_valid(unsigned msr)
return false;
}
-static bool valid_pat_type(unsigned t)
-{
- return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
-}
-
static bool valid_mtrr_type(unsigned t)
{
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
@@ -67,10 +62,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return false;
if (msr == MSR_IA32_CR_PAT) {
- for (i = 0; i < 8; i++)
- if (!valid_pat_type((data >> (i * 8)) & 0xff))
- return false;
- return true;
+ return kvm_pat_valid(data);
} else if (msr == MSR_MTRRdefType) {
if (data & ~0xcff)
return false;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6bdca39829bc..c40af67d0f44 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -141,15 +141,35 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
struct page *page;
npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
- /* Check if the user is doing something meaningless. */
- if (unlikely(npages != 1))
- return -EFAULT;
-
- table = kmap_atomic(page);
- ret = CMPXCHG(&table[index], orig_pte, new_pte);
- kunmap_atomic(table);
-
- kvm_release_page_dirty(page);
+ if (likely(npages == 1)) {
+ table = kmap_atomic(page);
+ ret = CMPXCHG(&table[index], orig_pte, new_pte);
+ kunmap_atomic(table);
+
+ kvm_release_page_dirty(page);
+ } else {
+ struct vm_area_struct *vma;
+ unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
+ unsigned long pfn;
+ unsigned long paddr;
+
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
+ if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+ pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ paddr = pfn << PAGE_SHIFT;
+ table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
+ if (!table) {
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+ ret = CMPXCHG(&table[index], orig_pte, new_pte);
+ memunmap(table);
+ up_read(&current->mm->mmap_sem);
+ }
return (ret != orig_pte);
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 58ead7db71a3..e39741997893 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -281,9 +281,13 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
{
bool fast_mode = idx & (1u << 31);
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
u64 ctr_val;
+ if (!pmu->version)
+ return 1;
+
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b5b128a0a051..893686cb0044 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -262,6 +262,7 @@ struct amd_svm_iommu_ir {
};
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
@@ -2090,7 +2091,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
init_vmcb(svm);
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
- kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
+ kvm_rdx_write(vcpu, eax);
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
@@ -2692,6 +2693,7 @@ static int npf_interception(struct vcpu_svm *svm)
static int db_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
if (!(svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
@@ -2702,6 +2704,8 @@ static int db_interception(struct vcpu_svm *svm)
if (svm->nmi_singlestep) {
disable_nmi_singlestep(svm);
+ /* Make sure we check for pending NMIs upon entry */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
if (svm->vcpu.guest_debug &
@@ -3067,32 +3071,6 @@ static inline bool nested_svm_nmi(struct vcpu_svm *svm)
return false;
}
-static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
-{
- struct page *page;
-
- might_sleep();
-
- page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page))
- goto error;
-
- *_page = page;
-
- return kmap(page);
-
-error:
- kvm_inject_gp(&svm->vcpu, 0);
-
- return NULL;
-}
-
-static void nested_svm_unmap(struct page *page)
-{
- kunmap(page);
- kvm_release_page_dirty(page);
-}
-
static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
{
unsigned port, size, iopm_len;
@@ -3295,10 +3273,11 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
static int nested_svm_vmexit(struct vcpu_svm *svm)
{
+ int rc;
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
- struct page *page;
+ struct kvm_host_map map;
trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
vmcb->control.exit_info_1,
@@ -3307,9 +3286,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb->control.exit_int_info_err,
KVM_ISA_SVM);
- nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
- if (!nested_vmcb)
+ rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
/* Exit Guest-Mode */
leave_guest_mode(&svm->vcpu);
@@ -3404,16 +3388,16 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
} else {
(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
}
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
+ kvm_rax_write(&svm->vcpu, hsave->save.rax);
+ kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
+ kvm_rip_write(&svm->vcpu, hsave->save.rip);
svm->vmcb->save.dr7 = 0;
svm->vmcb->save.cpl = 0;
svm->vmcb->control.exit_int_info = 0;
mark_all_dirty(svm->vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
nested_svm_uninit_mmu_context(&svm->vcpu);
kvm_mmu_reset_context(&svm->vcpu);
@@ -3479,7 +3463,7 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
}
static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb, struct page *page)
+ struct vmcb *nested_vmcb, struct kvm_host_map *map)
{
if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
svm->vcpu.arch.hflags |= HF_HIF_MASK;
@@ -3512,9 +3496,9 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
kvm_mmu_reset_context(&svm->vcpu);
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+ kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
+ kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
+ kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
/* In case we don't even reach vcpu_run, the fields are not updated */
svm->vmcb->save.rax = nested_vmcb->save.rax;
@@ -3563,7 +3547,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->vmcb->control.pause_filter_thresh =
nested_vmcb->control.pause_filter_thresh;
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, map, true);
/* Enter Guest-Mode */
enter_guest_mode(&svm->vcpu);
@@ -3583,17 +3567,23 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
static bool nested_svm_vmrun(struct vcpu_svm *svm)
{
+ int rc;
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
- struct page *page;
+ struct kvm_host_map map;
u64 vmcb_gpa;
vmcb_gpa = svm->vmcb->save.rax;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return false;
+ }
+
+ nested_vmcb = map.hva;
if (!nested_vmcb_checks(nested_vmcb)) {
nested_vmcb->control.exit_code = SVM_EXIT_ERR;
@@ -3601,7 +3591,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_1 = 0;
nested_vmcb->control.exit_info_2 = 0;
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return false;
}
@@ -3645,7 +3635,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(hsave, vmcb);
- enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page);
+ enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
return true;
}
@@ -3669,21 +3659,26 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
static int vmload_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
- struct page *page;
+ struct kvm_host_map map;
int ret;
if (nested_svm_check_permissions(svm))
return 1;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
+ if (ret) {
+ if (ret == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return ret;
}
@@ -3691,21 +3686,26 @@ static int vmload_interception(struct vcpu_svm *svm)
static int vmsave_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
- struct page *page;
+ struct kvm_host_map map;
int ret;
if (nested_svm_check_permissions(svm))
return 1;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
+ if (ret) {
+ if (ret == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return ret;
}
@@ -3787,11 +3787,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
- kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu),
+ kvm_rax_read(&svm->vcpu));
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
- kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3799,7 +3799,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
static int skinit_interception(struct vcpu_svm *svm)
{
- trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu));
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
@@ -3813,7 +3813,7 @@ static int wbinvd_interception(struct vcpu_svm *svm)
static int xsetbv_interception(struct vcpu_svm *svm)
{
u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
- u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 index = kvm_rcx_read(&svm->vcpu);
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
@@ -4209,7 +4209,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int rdmsr_interception(struct vcpu_svm *svm)
{
- u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 ecx = kvm_rcx_read(&svm->vcpu);
struct msr_data msr_info;
msr_info.index = ecx;
@@ -4221,10 +4221,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
} else {
trace_kvm_msr_read(ecx, msr_info.data);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
- msr_info.data & 0xffffffff);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
- msr_info.data >> 32);
+ kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
+ kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
return kvm_skip_emulated_instruction(&svm->vcpu);
}
@@ -4418,7 +4416,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
static int wrmsr_interception(struct vcpu_svm *svm)
{
struct msr_data msr;
- u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 ecx = kvm_rcx_read(&svm->vcpu);
u64 data = kvm_read_edx_eax(&svm->vcpu);
msr.data = data;
@@ -4517,14 +4515,25 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+ int i;
+ struct kvm_vcpu *vcpu;
+ struct kvm *kvm = svm->vcpu.kvm;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
/*
- * Update ICR high and low, then emulate sending IPI,
- * which is handled when writing APIC_ICR.
+ * At this point, we expect that the AVIC HW has already
+ * set the appropriate IRR bits on the valid target
+ * vcpus. So, we just need to kick the appropriate vcpu.
*/
- kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
- kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ bool m = kvm_apic_match_dest(vcpu, apic,
+ icrl & KVM_APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & KVM_APIC_DEST_MASK);
+
+ if (m && !avic_vcpu_is_running(vcpu))
+ kvm_vcpu_wake_up(vcpu);
+ }
break;
}
case AVIC_IPI_FAILURE_INVALID_TARGET:
@@ -4596,7 +4605,7 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
if (entry)
- WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK);
+ clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
}
static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
@@ -5621,6 +5630,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
clgi();
+ kvm_load_guest_xcr0(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -5766,6 +5776,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
+ kvm_put_guest_xcr0(vcpu);
stgi();
/* Any pending NMI will happen here */
@@ -6215,32 +6226,24 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *nested_vmcb;
- struct page *page;
- struct {
- u64 guest;
- u64 vmcb;
- } svm_state_save;
- int ret;
+ struct kvm_host_map map;
+ u64 guest;
+ u64 vmcb;
- ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
- sizeof(svm_state_save));
- if (ret)
- return ret;
+ guest = GET_SMSTATE(u64, smstate, 0x7ed8);
+ vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (svm_state_save.guest) {
- vcpu->arch.hflags &= ~HF_SMM_MASK;
- nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
- if (nested_vmcb)
- enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
- else
- ret = 1;
- vcpu->arch.hflags |= HF_SMM_MASK;
+ if (guest) {
+ if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
+ return 1;
+ nested_vmcb = map.hva;
+ enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
}
- return ret;
+ return 0;
}
static int enable_smi_window(struct kvm_vcpu *vcpu)
@@ -6422,11 +6425,11 @@ e_free:
return ret;
}
-static int get_num_contig_pages(int idx, struct page **inpages,
- unsigned long npages)
+static unsigned long get_num_contig_pages(unsigned long idx,
+ struct page **inpages, unsigned long npages)
{
unsigned long paddr, next_paddr;
- int i = idx + 1, pages = 1;
+ unsigned long i = idx + 1, pages = 1;
/* find the number of contiguous pages starting from idx */
paddr = __sme_page_pa(inpages[idx]);
@@ -6445,12 +6448,12 @@ static int get_num_contig_pages(int idx, struct page **inpages,
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+ unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data *data;
struct page **inpages;
- int i, ret, pages;
+ int ret;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6799,7 +6802,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
struct page **src_p, **dst_p;
struct kvm_sev_dbg debug;
unsigned long n;
- int ret, size;
+ unsigned int size;
+ int ret;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6807,6 +6811,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
return -EFAULT;
+ if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
+ return -EINVAL;
+ if (!debug.dst_uaddr)
+ return -EINVAL;
+
vaddr = debug.src_uaddr;
size = debug.len;
vaddr_end = vaddr + size;
@@ -6857,8 +6866,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
dst_vaddr,
len, &argp->error);
- sev_unpin_memory(kvm, src_p, 1);
- sev_unpin_memory(kvm, dst_p, 1);
+ sev_unpin_memory(kvm, src_p, n);
+ sev_unpin_memory(kvm, dst_p, n);
if (ret)
goto err;
@@ -7098,6 +7107,36 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
return -ENODEV;
}
+static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
+{
+ bool is_user, smap;
+
+ is_user = svm_get_cpl(vcpu) == 3;
+ smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+
+ /*
+ * Detect and workaround Errata 1096 Fam_17h_00_0Fh
+ *
+ * In non SEV guest, hypervisor will be able to read the guest
+ * memory to decode the instruction pointer when insn_len is zero
+ * so we return true to indicate that decoding is possible.
+ *
+ * But in the SEV guest, the guest memory is encrypted with the
+ * guest specific key and hypervisor will not be able to decode the
+ * instruction pointer so we will not able to workaround it. Lets
+ * print the error and request to kill the guest.
+ */
+ if (is_user && smap) {
+ if (!sev_guest(vcpu->kvm))
+ return true;
+
+ pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n");
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ }
+
+ return false;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -7231,6 +7270,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.nested_enable_evmcs = nested_enable_evmcs,
.nested_get_evmcs_version = nested_get_evmcs_version,
+
+ .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6432d08c7de7..4d47a2631d1f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi,
);
TRACE_EVENT(kvm_apic_accept_irq,
- TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+ TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
- __field( __u8, tm )
+ __field( __u16, tm )
__field( __u8, vec )
),
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 854e144131c6..d6664ee3d127 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -2,6 +2,8 @@
#ifndef __KVM_X86_VMX_CAPS_H
#define __KVM_X86_VMX_CAPS_H
+#include <asm/vmx.h>
+
#include "lapic.h"
extern bool __read_mostly enable_vpid;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f24a2c225070..f4b1ae439763 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -193,10 +193,8 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
if (!vmx->nested.hv_evmcs)
return;
- kunmap(vmx->nested.hv_evmcs_page);
- kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
vmx->nested.hv_evmcs_vmptr = -1ull;
- vmx->nested.hv_evmcs_page = NULL;
vmx->nested.hv_evmcs = NULL;
}
@@ -229,16 +227,9 @@ static void free_nested(struct kvm_vcpu *vcpu)
kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
- if (vmx->nested.virtual_apic_page) {
- kvm_release_page_dirty(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
- }
- if (vmx->nested.pi_desc_page) {
- kunmap(vmx->nested.pi_desc_page);
- kvm_release_page_dirty(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
- }
+ kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
+ vmx->nested.pi_desc = NULL;
kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
@@ -500,6 +491,17 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+ int msr;
+
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+
+ msr_bitmap[word] = ~0;
+ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+ }
+}
+
/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
@@ -508,88 +510,95 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
int msr;
- struct page *page;
unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
- /*
- * pred_cmd & spec_ctrl are trying to verify two things:
- *
- * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
- * ensures that we do not accidentally generate an L02 MSR bitmap
- * from the L12 MSR bitmap that is too permissive.
- * 2. That L1 or L2s have actually used the MSR. This avoids
- * unnecessarily merging of the bitmap if the MSR is unused. This
- * works properly because we only update the L01 MSR bitmap lazily.
- * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
- * updated to reflect this when L1 (or its L2s) actually write to
- * the MSR.
- */
- bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
- bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+ struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
/* Nothing to do if the MSR bitmap is not in use. */
if (!cpu_has_vmx_msr_bitmap() ||
!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return false;
- if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
- !pred_cmd && !spec_ctrl)
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
return false;
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
- if (is_error_page(page))
- return false;
+ msr_bitmap_l1 = (unsigned long *)map->hva;
- msr_bitmap_l1 = (unsigned long *)kmap(page);
- if (nested_cpu_has_apic_reg_virt(vmcs12)) {
- /*
- * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
- * just lets the processor take the value from the virtual-APIC page;
- * take those 256 bits directly from the L1 bitmap.
- */
- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap_l0[word] = msr_bitmap_l1[word];
- msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
- }
- } else {
- for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap_l0[word] = ~0;
- msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
- }
- }
+ /*
+ * To keep the control flow simple, pay eight 8-byte writes (sixteen
+ * 4-byte writes on 32-bit systems) up front to enable intercepts for
+ * the x2APIC MSR range and selectively disable them below.
+ */
+ enable_x2apic_msr_intercepts(msr_bitmap_l0);
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_TASKPRI),
- MSR_TYPE_W);
+ if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
+ if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+ /*
+ * L0 need not intercept reads for MSRs between 0x800
+ * and 0x8ff, it just lets the processor take the value
+ * from the virtual-APIC page; take those 256 bits
+ * directly from the L1 bitmap.
+ */
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+
+ msr_bitmap_l0[word] = msr_bitmap_l1[word];
+ }
+ }
- if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_EOI),
- MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
- X2APIC_MSR(APIC_SELF_IPI),
- MSR_TYPE_W);
+ X2APIC_MSR(APIC_TASKPRI),
+ MSR_TYPE_R | MSR_TYPE_W);
+
+ if (nested_cpu_has_vid(vmcs12)) {
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_EOI),
+ MSR_TYPE_W);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_SELF_IPI),
+ MSR_TYPE_W);
+ }
}
- if (spec_ctrl)
+ /* KVM unconditionally exposes the FS/GS base MSRs to L1. */
+ nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+ MSR_FS_BASE, MSR_TYPE_RW);
+
+ nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+ MSR_GS_BASE, MSR_TYPE_RW);
+
+ nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+ MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+
+ /*
+ * Checking the L0->L1 bitmap is trying to verify two things:
+ *
+ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+ * ensures that we do not accidentally generate an L02 MSR bitmap
+ * from the L12 MSR bitmap that is too permissive.
+ * 2. That L1 or L2s have actually used the MSR. This avoids
+ * unnecessarily merging of the bitmap if the MSR is unused. This
+ * works properly because we only update the L01 MSR bitmap lazily.
+ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
+ * updated to reflect this when L1 (or its L2s) actually write to
+ * the MSR.
+ */
+ if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_SPEC_CTRL,
MSR_TYPE_R | MSR_TYPE_W);
- if (pred_cmd)
+ if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_PRED_CMD,
MSR_TYPE_W);
- kunmap(page);
- kvm_release_page_clean(page);
+ kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
return true;
}
@@ -597,20 +606,20 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
+ struct kvm_host_map map;
struct vmcs12 *shadow;
- struct page *page;
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
vmcs12->vmcs_link_pointer == -1ull)
return;
shadow = get_shadow_vmcs12(vcpu);
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
- memcpy(shadow, kmap(page), VMCS12_SIZE);
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+ return;
- kunmap(page);
- kvm_release_page_clean(page);
+ memcpy(shadow, map.hva, VMCS12_SIZE);
+ kvm_vcpu_unmap(vcpu, &map, false);
}
static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
@@ -914,7 +923,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
if (!nested_cr3_valid(vcpu, cr3)) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
- return 1;
+ return -EINVAL;
}
/*
@@ -925,7 +934,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
!nested_ept) {
if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
*entry_failure_code = ENTRY_FAIL_PDPTE;
- return 1;
+ return -EINVAL;
}
}
}
@@ -1778,13 +1787,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
nested_release_evmcs(vcpu);
- vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
- vcpu, assist_page.current_nested_vmcs);
-
- if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs),
+ &vmx->nested.hv_evmcs_map))
return 0;
- vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
+ vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
/*
* Currently, KVM only supports eVMCS version 1
@@ -2357,19 +2364,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
*/
if (vmx->emulation_required) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
- return 1;
+ return -EINVAL;
}
/* Shadow page tables on either EPT or shadow page tables. */
if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
- return 1;
+ return -EINVAL;
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
- kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
- kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
+ kvm_rsp_write(vcpu, vmcs12->guest_rsp);
+ kvm_rip_write(vcpu, vmcs12->guest_rip);
return 0;
}
@@ -2573,11 +2580,19 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * Checks related to Host Control Registers and MSRs
- */
-static int nested_check_host_control_regs(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
+ nested_check_vm_exit_controls(vcpu, vmcs12) ||
+ nested_check_vm_entry_controls(vcpu, vmcs12))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
{
bool ia32e;
@@ -2585,6 +2600,15 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu,
!nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
!nested_cr3_valid(vcpu, vmcs12->host_cr3))
return -EINVAL;
+
+ if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) ||
+ is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))
+ return -EINVAL;
+
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
+ !kvm_pat_valid(vmcs12->host_ia32_pat))
+ return -EINVAL;
+
/*
* If the load IA32_EFER VM-exit control is 1, bits reserved in the
* IA32_EFER MSR must be 0 in the field for that register. In addition,
@@ -2603,41 +2627,12 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * Checks related to Guest Non-register State
- */
-static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
-{
- if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
- vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
- return -EINVAL;
-
- return 0;
-}
-
-static int nested_vmx_check_vmentry_prereqs(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
-{
- if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
- nested_check_vm_exit_controls(vcpu, vmcs12) ||
- nested_check_vm_entry_controls(vcpu, vmcs12))
- return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
-
- if (nested_check_host_control_regs(vcpu, vmcs12))
- return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
-
- if (nested_check_guest_non_reg_state(vmcs12))
- return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
-
- return 0;
-}
-
static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- int r;
- struct page *page;
+ int r = 0;
struct vmcs12 *shadow;
+ struct kvm_host_map map;
if (vmcs12->vmcs_link_pointer == -1ull)
return 0;
@@ -2645,23 +2640,34 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
return -EINVAL;
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
- if (is_error_page(page))
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
return -EINVAL;
- r = 0;
- shadow = kmap(page);
+ shadow = map.hva;
+
if (shadow->hdr.revision_id != VMCS12_REVISION ||
shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
r = -EINVAL;
- kunmap(page);
- kvm_release_page_clean(page);
+
+ kvm_vcpu_unmap(vcpu, &map, false);
return r;
}
-static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12,
- u32 *exit_qual)
+/*
+ * Checks related to Guest Non-register State
+ */
+static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
+{
+ if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
+ vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12,
+ u32 *exit_qual)
{
bool ia32e;
@@ -2669,11 +2675,15 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu,
if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
- return 1;
+ return -EINVAL;
+
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
+ !kvm_pat_valid(vmcs12->guest_ia32_pat))
+ return -EINVAL;
if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
*exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
- return 1;
+ return -EINVAL;
}
/*
@@ -2692,13 +2702,16 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu,
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
((vmcs12->guest_cr0 & X86_CR0_PG) &&
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
- return 1;
+ return -EINVAL;
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
- (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
- (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
- return 1;
+ (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
+ (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+ return -EINVAL;
+
+ if (nested_check_guest_non_reg_state(vmcs12))
+ return -EINVAL;
return 0;
}
@@ -2811,6 +2824,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_host_map *map;
struct page *page;
u64 hpa;
@@ -2843,53 +2857,43 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
}
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
- if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
- kvm_release_page_dirty(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
- }
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
+ map = &vmx->nested.virtual_apic_map;
/*
* If translation failed, VM entry will fail because
* prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
- * Failing the vm entry is _not_ what the processor
- * does but it's basically the only possibility we
- * have. We could still enter the guest if CR8 load
- * exits are enabled, CR8 store exits are enabled, and
- * virtualize APIC access is disabled; in this case
- * the processor would never use the TPR shadow and we
- * could simply clear the bit from the execution
- * control. But such a configuration is useless, so
- * let's keep the code simple.
*/
- if (!is_error_page(page)) {
- vmx->nested.virtual_apic_page = page;
- hpa = page_to_phys(vmx->nested.virtual_apic_page);
- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
+ if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
+ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
+ } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
+ nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
+ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ /*
+ * The processor will never use the TPR shadow, simply
+ * clear the bit from the execution control. Such a
+ * configuration is useless, but it happens in tests.
+ * For any other configuration, failing the vm entry is
+ * _not_ what the processor does but it's basically the
+ * only possibility we have.
+ */
+ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+ CPU_BASED_TPR_SHADOW);
+ } else {
+ printk("bad virtual-APIC page address\n");
+ dump_vmcs();
}
}
if (nested_cpu_has_posted_intr(vmcs12)) {
- if (vmx->nested.pi_desc_page) { /* shouldn't happen */
- kunmap(vmx->nested.pi_desc_page);
- kvm_release_page_dirty(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
- vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull);
+ map = &vmx->nested.pi_desc_map;
+
+ if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
+ vmx->nested.pi_desc =
+ (struct pi_desc *)(((void *)map->hva) +
+ offset_in_page(vmcs12->posted_intr_desc_addr));
+ vmcs_write64(POSTED_INTR_DESC_ADDR,
+ pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
}
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
- if (is_error_page(page))
- return;
- vmx->nested.pi_desc_page = page;
- vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc =
- (struct pi_desc *)((void *)vmx->nested.pi_desc +
- (unsigned long)(vmcs12->posted_intr_desc_addr &
- (PAGE_SIZE - 1)));
- vmcs_write64(POSTED_INTR_DESC_ADDR,
- page_to_phys(vmx->nested.pi_desc_page) +
- (unsigned long)(vmcs12->posted_intr_desc_addr &
- (PAGE_SIZE - 1)));
}
if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
@@ -2972,7 +2976,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
return -1;
}
- if (nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+ if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
goto vmentry_fail_vmexit;
}
@@ -3117,9 +3121,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
: VMXERR_VMRESUME_NONLAUNCHED_VMCS);
- ret = nested_vmx_check_vmentry_prereqs(vcpu, vmcs12);
- if (ret)
- return nested_vmx_failValid(vcpu, ret);
+ if (nested_vmx_check_controls(vcpu, vmcs12))
+ return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+
+ if (nested_vmx_check_host_state(vcpu, vmcs12))
+ return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
/*
* We're finally done with prerequisite checking, and can start with
@@ -3282,11 +3288,12 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
if (max_irr != 256) {
- vapic_page = kmap(vmx->nested.virtual_apic_page);
+ vapic_page = vmx->nested.virtual_apic_map.hva;
+ if (!vapic_page)
+ return;
+
__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
vapic_page, &max_irr);
- kunmap(vmx->nested.virtual_apic_page);
-
status = vmcs_read16(GUEST_INTR_STATUS);
if ((u8)max_irr > ((u8)status & 0xff)) {
status &= ~0xff;
@@ -3397,8 +3404,8 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
- vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
- vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
+ vmcs12->guest_rsp = kvm_rsp_read(vcpu);
+ vmcs12->guest_rip = kvm_rip_read(vcpu);
vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
@@ -3581,8 +3588,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
vmx_set_efer(vcpu, vcpu->arch.efer);
- kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
- kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
+ kvm_rsp_write(vcpu, vmcs12->host_rsp);
+ kvm_rip_write(vcpu, vmcs12->host_rip);
vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
vmx_set_interrupt_shadow(vcpu, 0);
@@ -3768,8 +3775,18 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
nested_ept_uninit_mmu_context(vcpu);
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+ /*
+ * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
+ * points to shadow pages! Fortunately we only get here after a WARN_ON
+ * if EPT is disabled, so a VMabort is perfectly fine.
+ */
+ if (enable_ept) {
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ } else {
+ nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
+ }
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -3917,16 +3934,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
- if (vmx->nested.virtual_apic_page) {
- kvm_release_page_dirty(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
- }
- if (vmx->nested.pi_desc_page) {
- kunmap(vmx->nested.pi_desc_page);
- kvm_release_page_dirty(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
- }
+ kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
+ vmx->nested.pi_desc = NULL;
/*
* We are now running in L2, mmu_notifier will force to reload the
@@ -4222,7 +4232,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
{
int ret;
gpa_t vmptr;
- struct page *page;
+ uint32_t revision;
struct vcpu_vmx *vmx = to_vmx(vcpu);
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -4268,20 +4278,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
* Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
* which replaces physical address width with 32
*/
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
- return nested_vmx_failInvalid(vcpu);
-
- page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
- if (is_error_page(page))
+ if (!page_address_valid(vcpu, vmptr))
return nested_vmx_failInvalid(vcpu);
- if (*(u32 *)kmap(page) != VMCS12_REVISION) {
- kunmap(page);
- kvm_release_page_clean(page);
+ if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
+ revision != VMCS12_REVISION)
return nested_vmx_failInvalid(vcpu);
- }
- kunmap(page);
- kvm_release_page_clean(page);
vmx->nested.vmxon_ptr = vmptr;
ret = enter_vmx_operation(vcpu);
@@ -4339,7 +4341,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+ if (!page_address_valid(vcpu, vmptr))
return nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_INVALID_ADDRESS);
@@ -4347,7 +4349,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
- if (vmx->nested.hv_evmcs_page) {
+ if (vmx->nested.hv_evmcs_map.hva) {
if (vmptr == vmx->nested.hv_evmcs_vmptr)
nested_release_evmcs(vcpu);
} else {
@@ -4546,7 +4548,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+ if (!page_address_valid(vcpu, vmptr))
return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INVALID_ADDRESS);
@@ -4559,11 +4561,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return 1;
if (vmx->nested.current_vmptr != vmptr) {
+ struct kvm_host_map map;
struct vmcs12 *new_vmcs12;
- struct page *page;
- page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
- if (is_error_page(page)) {
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
/*
* Reads from an unbacked page return all 1s,
* which means that the 32 bits located at the
@@ -4573,12 +4574,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
- new_vmcs12 = kmap(page);
+
+ new_vmcs12 = map.hva;
+
if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
(new_vmcs12->hdr.shadow_vmcs &&
!nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
- kunmap(page);
- kvm_release_page_clean(page);
+ kvm_vcpu_unmap(vcpu, &map, false);
return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
@@ -4590,8 +4592,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
* cached.
*/
memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
- kunmap(page);
- kvm_release_page_clean(page);
+ kvm_vcpu_unmap(vcpu, &map, false);
set_current_vmptr(vmx, vmptr);
}
@@ -4766,7 +4767,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+ u32 index = kvm_rcx_read(vcpu);
u64 address;
bool accessed_dirty;
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
@@ -4812,7 +4813,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12;
- u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
+ u32 function = kvm_rax_read(vcpu);
/*
* VMFUNC is only supported for nested guests, but we always enable the
@@ -4898,7 +4899,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12, u32 exit_reason)
{
- u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
+ u32 msr_index = kvm_rcx_read(vcpu);
gpa_t bitmap;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
@@ -5335,9 +5336,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->format != 0)
return -EINVAL;
- if (kvm_state->flags & KVM_STATE_NESTED_EVMCS)
- nested_enable_evmcs(vcpu, NULL);
-
if (!nested_vmx_allowed(vcpu))
return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
@@ -5379,6 +5377,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->vmx.vmxon_pa == -1ull)
return 0;
+ if (kvm_state->flags & KVM_STATE_NESTED_EVMCS)
+ nested_enable_evmcs(vcpu, NULL);
+
vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
ret = enter_vmx_operation(vcpu);
if (ret)
@@ -5422,9 +5423,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
return 0;
- vmx->nested.nested_run_pending =
- !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
-
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
@@ -5442,14 +5440,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
}
- if (nested_vmx_check_vmentry_prereqs(vcpu, vmcs12) ||
- nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+ if (nested_vmx_check_controls(vcpu, vmcs12) ||
+ nested_vmx_check_host_state(vcpu, vmcs12) ||
+ nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
return -EINVAL;
vmx->nested.dirty_vmcs12 = true;
+ vmx->nested.nested_run_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- if (ret)
+ if (ret) {
+ vmx->nested.nested_run_pending = 0;
return -EINVAL;
+ }
return 0;
}
@@ -5717,6 +5721,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
+ /*
+ * Without EPT it is not possible to restore L1's CR3 and PDPTR on
+ * VMfail, because they are not available in vmcs01. Just always
+ * use hardware checks.
+ */
+ if (!enable_ept)
+ nested_early_check = 1;
+
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs) {
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 5ab4a364348e..f8502c376b37 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -227,7 +227,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
+ if (!(data & pmu->global_ovf_ctrl_mask)) {
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
pmu->global_ovf_ctrl = data;
@@ -297,6 +297,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
pmu->global_ctrl_mask = ~pmu->global_ctrl;
+ pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask
+ & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
+ MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD);
+ if (kvm_x86_ops->pt_supported())
+ pmu->global_ovf_ctrl_mask &=
+ ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
entry = kvm_find_cpuid_entry(vcpu, 7, 0);
if (entry &&
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c73375e01ab8..0db7ded18951 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1683,12 +1683,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = to_vmx(vcpu)->spec_ctrl;
break;
- case MSR_IA32_ARCH_CAPABILITIES:
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
- return 1;
- msr_info->data = to_vmx(vcpu)->arch_capabilities;
- break;
case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
break;
@@ -1698,6 +1692,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
+ case MSR_IA32_POWER_CTL:
+ msr_info->data = vmx->msr_ia32_power_ctl;
+ break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -1828,6 +1825,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
+ case MSR_IA32_POWER_CTL:
+ vmx->msr_ia32_power_ctl = data;
+ break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -1895,14 +1895,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
MSR_TYPE_W);
break;
- case MSR_IA32_ARCH_CAPABILITIES:
- if (!msr_info->host_initiated)
- return 1;
- vmx->arch_capabilities = data;
- break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ if (!kvm_pat_valid(data))
return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
@@ -2299,7 +2294,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
- VM_EXIT_SAVE_IA32_PAT |
VM_EXIT_LOAD_IA32_PAT |
VM_EXIT_LOAD_IA32_EFER |
VM_EXIT_CLEAR_BNDCFGS |
@@ -3630,14 +3624,13 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
!nested_cpu_has_vid(get_vmcs12(vcpu)) ||
- WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
+ WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
return false;
rvi = vmx_get_rvi();
- vapic_page = kmap(vmx->nested.virtual_apic_page);
+ vapic_page = vmx->nested.virtual_apic_map.hva;
vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
- kunmap(vmx->nested.virtual_apic_page);
return ((rvi & 0xf0) > (vppr & 0xf0));
}
@@ -4088,8 +4081,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
++vmx->nmsrs;
}
- vmx->arch_capabilities = kvm_get_arch_capabilities();
-
vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
/* 22.2.1, 20.8.1 */
@@ -4840,7 +4831,7 @@ static int handle_cpuid(struct kvm_vcpu *vcpu)
static int handle_rdmsr(struct kvm_vcpu *vcpu)
{
- u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
+ u32 ecx = kvm_rcx_read(vcpu);
struct msr_data msr_info;
msr_info.index = ecx;
@@ -4853,18 +4844,16 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
trace_kvm_msr_read(ecx, msr_info.data);
- /* FIXME: handling of bits 32:63 of rax, rdx */
- vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
- vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
+ kvm_rax_write(vcpu, msr_info.data & -1u);
+ kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u);
return kvm_skip_emulated_instruction(vcpu);
}
static int handle_wrmsr(struct kvm_vcpu *vcpu)
{
struct msr_data msr;
- u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
- u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
- | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+ u32 ecx = kvm_rcx_read(vcpu);
+ u64 data = kvm_read_edx_eax(vcpu);
msr.data = data;
msr.index = ecx;
@@ -4935,7 +4924,7 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
static int handle_xsetbv(struct kvm_vcpu *vcpu)
{
u64 new_bv = kvm_read_edx_eax(vcpu);
- u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ u32 index = kvm_rcx_read(vcpu);
if (kvm_set_xcr(vcpu, index, new_bv) == 0)
return kvm_skip_emulated_instruction(vcpu);
@@ -5616,7 +5605,7 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
}
-static void dump_vmcs(void)
+void dump_vmcs(void)
{
u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
@@ -5736,8 +5725,16 @@ static void dump_vmcs(void)
if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
pr_err("TSC Multiplier = 0x%016llx\n",
vmcs_read64(TSC_MULTIPLIER));
- if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
- pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
+ if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
+ if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
+ u16 status = vmcs_read16(GUEST_INTR_STATUS);
+ pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
+ }
+ pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
+ if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
+ pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
+ pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
+ }
if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
@@ -6423,6 +6420,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
+ kvm_load_guest_xcr0(vcpu);
+
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
vcpu->arch.pkru != vmx->host_pkru)
@@ -6519,6 +6518,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vmx->host_pkru);
}
+ kvm_put_guest_xcr0(vcpu);
+
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
@@ -7012,7 +7013,8 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift,
return 0;
}
-static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
+static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired)
{
struct vcpu_vmx *vmx;
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
@@ -7033,10 +7035,9 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
/* Convert to host delta tsc if tsc scaling is enabled */
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
- u64_shl_div_u64(delta_tsc,
+ delta_tsc && u64_shl_div_u64(delta_tsc,
kvm_tsc_scaling_ratio_frac_bits,
- vcpu->arch.tsc_scaling_ratio,
- &delta_tsc))
+ vcpu->arch.tsc_scaling_ratio, &delta_tsc))
return -ERANGE;
/*
@@ -7049,7 +7050,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
return -ERANGE;
vmx->hv_deadline_tsc = tscl + delta_tsc;
- return delta_tsc == 0;
+ *expired = !delta_tsc;
+ return 0;
}
static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
@@ -7086,9 +7088,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- gpa_t gpa;
- struct page *page = NULL;
- u64 *pml_address;
+ gpa_t gpa, dst;
if (is_guest_mode(vcpu)) {
WARN_ON_ONCE(vmx->nested.pml_full);
@@ -7108,15 +7108,13 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
}
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
+ dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
- if (is_error_page(page))
+ if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
+ offset_in_page(dst), sizeof(gpa)))
return 0;
- pml_address = kmap(page);
- pml_address[vmcs12->guest_pml_index--] = gpa;
- kunmap(page);
- kvm_release_page_clean(page);
+ vmcs12->guest_pml_index--;
}
return 0;
@@ -7382,7 +7380,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
@@ -7393,9 +7391,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
}
if (vmx->nested.smm.guest_mode) {
- vcpu->arch.hflags &= ~HF_SMM_MASK;
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- vcpu->arch.hflags |= HF_SMM_MASK;
if (ret)
return ret;
@@ -7409,6 +7405,11 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
return 0;
}
+static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
@@ -7711,6 +7712,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.set_nested_state = NULL,
.get_vmcs12_pages = NULL,
.nested_enable_evmcs = NULL,
+ .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
};
static void vmx_cleanup_l1d_flush(void)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 1554cb45b393..63d37ccce3dc 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -142,8 +142,11 @@ struct nested_vmx {
* pointers, so we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
- struct page *virtual_apic_page;
- struct page *pi_desc_page;
+ struct kvm_host_map virtual_apic_map;
+ struct kvm_host_map pi_desc_map;
+
+ struct kvm_host_map msr_bitmap_map;
+
struct pi_desc *pi_desc;
bool pi_pending;
u16 posted_intr_nv;
@@ -169,7 +172,7 @@ struct nested_vmx {
} smm;
gpa_t hv_evmcs_vmptr;
- struct page *hv_evmcs_page;
+ struct kvm_host_map hv_evmcs_map;
struct hv_enlightened_vmcs *hv_evmcs;
};
@@ -190,7 +193,6 @@ struct vcpu_vmx {
u64 msr_guest_kernel_gs_base;
#endif
- u64 arch_capabilities;
u64 spec_ctrl;
u32 vm_entry_controls_shadow;
@@ -258,6 +260,8 @@ struct vcpu_vmx {
unsigned long host_debugctlmsr;
+ u64 msr_ia32_power_ctl;
+
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
@@ -518,4 +522,6 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
}
+void dump_vmcs(void);
+
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 65e4559eef2f..dc621f73e96b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
}
EXPORT_SYMBOL_GPL(kvm_lmsw);
-static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
@@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 1;
}
}
+EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
-static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_xcr0_loaded) {
if (vcpu->arch.xcr0 != host_xcr0)
@@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 0;
}
}
+EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
@@ -1094,15 +1096,15 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
bool kvm_rdpmc(struct kvm_vcpu *vcpu)
{
- u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ u32 ecx = kvm_rcx_read(vcpu);
u64 data;
int err;
err = kvm_pmu_rdpmc(vcpu, ecx, &data);
if (err)
return err;
- kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
- kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
+ kvm_rax_write(vcpu, (u32)data);
+ kvm_rdx_write(vcpu, data >> 32);
return err;
}
EXPORT_SYMBOL_GPL(kvm_rdpmc);
@@ -1125,7 +1127,7 @@ static u32 msrs_to_save[] = {
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
- MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES,
+ MSR_IA32_SPEC_CTRL,
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
@@ -1158,6 +1160,7 @@ static u32 emulated_msrs[] = {
MSR_IA32_TSC_ADJUST,
MSR_IA32_TSCDEADLINE,
+ MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
MSR_IA32_MCG_CTL,
@@ -1167,6 +1170,9 @@ static u32 emulated_msrs[] = {
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
MSR_AMD64_VIRT_SPEC_CTRL,
+ MSR_IA32_POWER_CTL,
+
+ MSR_K7_HWCR,
};
static unsigned num_emulated_msrs;
@@ -1255,31 +1261,49 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
return 0;
}
-bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
+static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
- if (efer & efer_reserved_bits)
- return false;
-
if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
- return false;
+ return false;
if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
- return false;
+ return false;
+
+ if (efer & (EFER_LME | EFER_LMA) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ return false;
+
+ if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
+ return false;
return true;
+
+}
+bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+ if (efer & efer_reserved_bits)
+ return false;
+
+ return __kvm_valid_efer(vcpu, efer);
}
EXPORT_SYMBOL_GPL(kvm_valid_efer);
-static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
+static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u64 old_efer = vcpu->arch.efer;
+ u64 efer = msr_info->data;
- if (!kvm_valid_efer(vcpu, efer))
- return 1;
+ if (efer & efer_reserved_bits)
+ return false;
- if (is_paging(vcpu)
- && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
- return 1;
+ if (!msr_info->host_initiated) {
+ if (!__kvm_valid_efer(vcpu, efer))
+ return 1;
+
+ if (is_paging(vcpu) &&
+ (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
+ return 1;
+ }
efer &= ~EFER_LMA;
efer |= vcpu->arch.efer & EFER_LMA;
@@ -2272,6 +2296,18 @@ static void kvmclock_sync_fn(struct work_struct *work)
KVMCLOCK_SYNC_PERIOD);
}
+/*
+ * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
+ */
+static bool can_set_mci_status(struct kvm_vcpu *vcpu)
+{
+ /* McStatusWrEn enabled? */
+ if (guest_cpuid_is_amd(vcpu))
+ return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
+
+ return false;
+}
+
static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u64 mcg_cap = vcpu->arch.mcg_cap;
@@ -2303,9 +2339,14 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((offset & 0x3) == 0 &&
data != 0 && (data | (1 << 10)) != ~(u64)0)
return -1;
+
+ /* MCi_STATUS */
if (!msr_info->host_initiated &&
- (offset & 0x3) == 1 && data != 0)
- return -1;
+ (offset & 0x3) == 1 && data != 0) {
+ if (!can_set_mci_status(vcpu))
+ return -1;
+ }
+
vcpu->arch.mce_banks[offset] = data;
break;
}
@@ -2443,14 +2484,22 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (msr_info->host_initiated)
vcpu->arch.microcode_version = data;
break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated)
+ return 1;
+ vcpu->arch.arch_capabilities = data;
+ break;
case MSR_EFER:
- return set_efer(vcpu, data);
+ return set_efer(vcpu, msr_info);
case MSR_K7_HWCR:
data &= ~(u64)0x40; /* ignore flush filter disable */
data &= ~(u64)0x100; /* ignore ignne emulation enable */
data &= ~(u64)0x8; /* ignore TLB cache disable */
- data &= ~(u64)0x40000; /* ignore Mc status write enable */
- if (data != 0) {
+
+ /* Handle McStatusWrEn */
+ if (data == BIT_ULL(18)) {
+ vcpu->arch.msr_hwcr = data;
+ } else if (data != 0) {
vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
return 1;
@@ -2724,7 +2773,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_K8_SYSCFG:
case MSR_K8_TSEG_ADDR:
case MSR_K8_TSEG_MASK:
- case MSR_K7_HWCR:
case MSR_VM_HSAVE_PA:
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_NB_CFG:
@@ -2747,6 +2795,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_UCODE_REV:
msr_info->data = vcpu->arch.microcode_version;
break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
+ return 1;
+ msr_info->data = vcpu->arch.arch_capabilities;
+ break;
case MSR_IA32_TSC:
msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
break;
@@ -2882,6 +2936,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_MISC_FEATURES_ENABLES:
msr_info->data = vcpu->arch.msr_misc_features_enables;
break;
+ case MSR_K7_HWCR:
+ msr_info->data = vcpu->arch.msr_hwcr;
+ break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -3061,9 +3118,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
- case KVM_CAP_NR_MEMSLOTS:
- r = KVM_USER_MEM_SLOTS;
- break;
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
break;
@@ -3081,7 +3135,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_NESTED_STATE:
r = kvm_x86_ops->get_nested_state ?
- kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+ kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
break;
default:
break;
@@ -3516,7 +3570,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
memset(&events->reserved, 0, sizeof(events->reserved));
}
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
+static void kvm_smm_changed(struct kvm_vcpu *vcpu);
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
@@ -3576,12 +3630,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
- u32 hflags = vcpu->arch.hflags;
- if (events->smi.smm)
- hflags |= HF_SMM_MASK;
- else
- hflags &= ~HF_SMM_MASK;
- kvm_set_hflags(vcpu, hflags);
+ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+ if (events->smi.smm)
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ else
+ vcpu->arch.hflags &= ~HF_SMM_MASK;
+ kvm_smm_changed(vcpu);
+ }
vcpu->arch.smi_pending = events->smi.pending;
@@ -4258,7 +4313,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
- u32 kvm_nr_mmu_pages)
+ unsigned long kvm_nr_mmu_pages)
{
if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
return -EINVAL;
@@ -4272,7 +4327,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
return 0;
}
-static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
+static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
{
return kvm->arch.n_max_mmu_pages;
}
@@ -5502,9 +5557,9 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
unsigned int bytes,
struct x86_exception *exception)
{
+ struct kvm_host_map map;
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
- struct page *page;
char *kaddr;
bool exchanged;
@@ -5521,12 +5576,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
goto emul_write;
- page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page))
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
goto emul_write;
- kaddr = kmap_atomic(page);
- kaddr += offset_in_page(gpa);
+ kaddr = map.hva + offset_in_page(gpa);
+
switch (bytes) {
case 1:
exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
@@ -5543,13 +5597,12 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
default:
BUG();
}
- kunmap_atomic(kaddr);
- kvm_release_page_dirty(page);
+
+ kvm_vcpu_unmap(vcpu, &map, true);
if (!exchanged)
return X86EMUL_CMPXCHG_FAILED;
- kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
kvm_page_track_write(vcpu, gpa, new, bytes);
return X86EMUL_CONTINUE;
@@ -5946,12 +5999,18 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
- kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+ emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+}
+
+static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
+{
+ return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
}
-static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
{
- return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
+ kvm_smm_changed(emul_to_vcpu(ctxt));
}
static const struct x86_emulate_ops emulate_ops = {
@@ -5994,6 +6053,7 @@ static const struct x86_emulate_ops emulate_ops = {
.get_hflags = emulator_get_hflags,
.set_hflags = emulator_set_hflags,
.pre_leave_smm = emulator_pre_leave_smm,
+ .post_leave_smm = emulator_post_leave_smm,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -6235,16 +6295,6 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
-{
- unsigned changed = vcpu->arch.hflags ^ emul_flags;
-
- vcpu->arch.hflags = emul_flags;
-
- if (changed & HF_SMM_MASK)
- kvm_smm_changed(vcpu);
-}
-
static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
unsigned long *db)
{
@@ -6523,14 +6573,27 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
+static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pio.count = 0;
+
+ if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
+ return 1;
+
+ return kvm_skip_emulated_instruction(vcpu);
+}
+
static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port)
{
- unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ unsigned long val = kvm_rax_read(vcpu);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
size, port, &val, 1);
- /* do not return to emulator after return from userspace */
- vcpu->arch.pio.count = 0;
+
+ if (!ret) {
+ vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
+ vcpu->arch.complete_userspace_io = complete_fast_pio_out;
+ }
return ret;
}
@@ -6541,9 +6604,13 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
/* We should only ever be called with arch.pio.count equal to 1 */
BUG_ON(vcpu->arch.pio.count != 1);
+ if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
+ vcpu->arch.pio.count = 0;
+ return 1;
+ }
+
/* For size less than 4 we merge, else we zero extend */
- val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
- : 0;
+ val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
/*
* Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform
@@ -6551,9 +6618,9 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
*/
emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
vcpu->arch.pio.port, &val, 1);
- kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+ kvm_rax_write(vcpu, val);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
@@ -6563,15 +6630,16 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
int ret;
/* For size less than 4 we merge, else we zero extend */
- val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
+ val = (size < 4) ? kvm_rax_read(vcpu) : 0;
ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
&val, 1);
if (ret) {
- kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+ kvm_rax_write(vcpu, val);
return ret;
}
+ vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
vcpu->arch.complete_userspace_io = complete_fast_pio_in;
return 0;
@@ -6579,16 +6647,13 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
{
- int ret = kvm_skip_emulated_instruction(vcpu);
+ int ret;
- /*
- * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
- * KVM_EXIT_DEBUG here.
- */
if (in)
- return kvm_fast_pio_in(vcpu, size, port) && ret;
+ ret = kvm_fast_pio_in(vcpu, size, port);
else
- return kvm_fast_pio_out(vcpu, size, port) && ret;
+ ret = kvm_fast_pio_out(vcpu, size, port);
+ return ret && kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_fast_pio);
@@ -6796,10 +6861,20 @@ static unsigned long kvm_get_guest_ip(void)
return ip;
}
+static void kvm_handle_intel_pt_intr(void)
+{
+ struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
+
+ kvm_make_request(KVM_REQ_PMI, vcpu);
+ __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
+ (unsigned long *)&vcpu->arch.pmu.global_status);
+}
+
static struct perf_guest_info_callbacks kvm_guest_cbs = {
.is_in_guest = kvm_is_in_guest,
.is_user_mode = kvm_is_user_mode,
.get_guest_ip = kvm_get_guest_ip,
+ .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
};
static void kvm_set_mmio_spte_mask(void)
@@ -7075,11 +7150,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
- nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
- a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
- a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
- a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
- a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
+ nr = kvm_rax_read(vcpu);
+ a0 = kvm_rbx_read(vcpu);
+ a1 = kvm_rcx_read(vcpu);
+ a2 = kvm_rdx_read(vcpu);
+ a3 = kvm_rsi_read(vcpu);
trace_kvm_hypercall(nr, a0, a1, a2, a3);
@@ -7120,7 +7195,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
out:
if (!op_64_bit)
ret = (u32)ret;
- kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+ kvm_rax_write(vcpu, ret);
++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);
@@ -7413,9 +7488,9 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
}
+#ifdef CONFIG_X86_64
static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
{
-#ifdef CONFIG_X86_64
struct desc_ptr dt;
struct kvm_segment seg;
unsigned long val;
@@ -7465,10 +7540,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
for (i = 0; i < 6; i++)
enter_smm_save_seg_64(vcpu, buf, i);
-#else
- WARN_ON_ONCE(1);
-#endif
}
+#endif
static void enter_smm(struct kvm_vcpu *vcpu)
{
@@ -7479,9 +7552,11 @@ static void enter_smm(struct kvm_vcpu *vcpu)
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
enter_smm_save_state_64(vcpu, buf);
else
+#endif
enter_smm_save_state_32(vcpu, buf);
/*
@@ -7539,8 +7614,10 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
kvm_x86_ops->set_efer(vcpu, 0);
+#endif
kvm_update_cpuid(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -7837,8 +7914,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
- kvm_load_guest_xcr0(vcpu);
-
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_x86_ops->request_immediate_exit(vcpu);
@@ -7891,8 +7966,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_put_guest_xcr0(vcpu);
-
kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
kvm_after_interrupt(vcpu);
@@ -8211,23 +8284,23 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
}
- regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
- regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
- regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
- regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
- regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
- regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
- regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
- regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
+ regs->rax = kvm_rax_read(vcpu);
+ regs->rbx = kvm_rbx_read(vcpu);
+ regs->rcx = kvm_rcx_read(vcpu);
+ regs->rdx = kvm_rdx_read(vcpu);
+ regs->rsi = kvm_rsi_read(vcpu);
+ regs->rdi = kvm_rdi_read(vcpu);
+ regs->rsp = kvm_rsp_read(vcpu);
+ regs->rbp = kvm_rbp_read(vcpu);
#ifdef CONFIG_X86_64
- regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
- regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
- regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
- regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
- regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
- regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
- regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
- regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
+ regs->r8 = kvm_r8_read(vcpu);
+ regs->r9 = kvm_r9_read(vcpu);
+ regs->r10 = kvm_r10_read(vcpu);
+ regs->r11 = kvm_r11_read(vcpu);
+ regs->r12 = kvm_r12_read(vcpu);
+ regs->r13 = kvm_r13_read(vcpu);
+ regs->r14 = kvm_r14_read(vcpu);
+ regs->r15 = kvm_r15_read(vcpu);
#endif
regs->rip = kvm_rip_read(vcpu);
@@ -8247,23 +8320,23 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
- kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
- kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
- kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
- kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
- kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
- kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
- kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
- kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
+ kvm_rax_write(vcpu, regs->rax);
+ kvm_rbx_write(vcpu, regs->rbx);
+ kvm_rcx_write(vcpu, regs->rcx);
+ kvm_rdx_write(vcpu, regs->rdx);
+ kvm_rsi_write(vcpu, regs->rsi);
+ kvm_rdi_write(vcpu, regs->rdi);
+ kvm_rsp_write(vcpu, regs->rsp);
+ kvm_rbp_write(vcpu, regs->rbp);
#ifdef CONFIG_X86_64
- kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
- kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
- kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
- kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
- kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
- kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
- kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
- kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
+ kvm_r8_write(vcpu, regs->r8);
+ kvm_r9_write(vcpu, regs->r9);
+ kvm_r10_write(vcpu, regs->r10);
+ kvm_r11_write(vcpu, regs->r11);
+ kvm_r12_write(vcpu, regs->r12);
+ kvm_r13_write(vcpu, regs->r13);
+ kvm_r14_write(vcpu, regs->r14);
+ kvm_r15_write(vcpu, regs->r15);
#endif
kvm_rip_write(vcpu, regs->rip);
@@ -8733,6 +8806,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
kvm_vcpu_mtrr_init(vcpu);
vcpu_load(vcpu);
@@ -9429,13 +9503,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- int nr_mmu_pages = 0;
-
if (!kvm->arch.n_requested_mmu_pages)
- nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
-
- if (nr_mmu_pages)
- kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
+ kvm_mmu_change_mmu_pages(kvm,
+ kvm_mmu_calculate_default_mmu_pages(kvm));
/*
* Dirty logging tracks sptes in 4k granularity, meaning that large
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 28406aa1136d..eda954441213 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -347,4 +347,16 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
__this_cpu_write(current_vcpu, NULL);
}
+
+static inline bool kvm_pat_valid(u64 data)
+{
+ if (data & 0xF8F8F8F8F8F8F8F8ull)
+ return false;
+ /* 0, 1, 4, 5, 6, 7 are valid values. */
+ return (data | ((data & 0x0202020202020202ull) << 1)) == data;
+}
+
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
+
#endif
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index db3165714521..dc726e07d8ba 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -230,7 +230,7 @@ bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
/* Can we access it for direct reading/writing? Must be RAM: */
int valid_phys_addr_range(phys_addr_t addr, size_t count)
{
- return addr + count <= __pa(high_memory);
+ return addr + count - 1 <= __pa(high_memory - 1);
}
/* Can we access it through mmap? Must be a valid physical address: */
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 458a0e2bcc57..a25a9fd987a9 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -449,7 +449,7 @@ void __init efi_free_boot_services(void)
*/
rm_size = real_mode_size_needed();
if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
- set_real_mode_mem(start, rm_size);
+ set_real_mode_mem(start);
start += rm_size;
size -= rm_size;
}
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index d10105825d57..7dce39c8c034 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -15,15 +15,6 @@ u32 *trampoline_cr4_features;
/* Hold the pgd entry used on booting additional CPUs */
pgd_t trampoline_pgd_entry;
-void __init set_real_mode_mem(phys_addr_t mem, size_t size)
-{
- void *base = __va(mem);
-
- real_mode_header = (struct real_mode_header *) base;
- printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
- base, (unsigned long long)mem, size);
-}
-
void __init reserve_real_mode(void)
{
phys_addr_t mem;
@@ -42,7 +33,7 @@ void __init reserve_real_mode(void)
}
memblock_reserve(mem, size);
- set_real_mode_mem(mem, size);
+ set_real_mode_mem(mem);
}
static void __init setup_real_mode(void)
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 42b6cb3d16f7..3843198e03d4 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += kmap_types.h
generic-y += kprobes.h
+generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index f7dd895b2353..0c14018d1c26 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -187,15 +187,18 @@ struct thread_struct {
/* Clearing a0 terminates the backtrace. */
#define start_thread(regs, new_pc, new_sp) \
- memset(regs, 0, sizeof(*regs)); \
- regs->pc = new_pc; \
- regs->ps = USER_PS_VALUE; \
- regs->areg[1] = new_sp; \
- regs->areg[0] = 0; \
- regs->wmask = 1; \
- regs->depc = 0; \
- regs->windowbase = 0; \
- regs->windowstart = 1;
+ do { \
+ memset((regs), 0, sizeof(*(regs))); \
+ (regs)->pc = (new_pc); \
+ (regs)->ps = USER_PS_VALUE; \
+ (regs)->areg[1] = (new_sp); \
+ (regs)->areg[0] = 0; \
+ (regs)->wmask = 1; \
+ (regs)->depc = 0; \
+ (regs)->windowbase = 0; \
+ (regs)->windowstart = 1; \
+ (regs)->syscall = NO_SYSCALL; \
+ } while (0)
/* Forward declaration */
struct task_struct;
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index a168bf81c7f4..91dc06d58060 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -59,45 +59,24 @@ static inline void syscall_set_return_value(struct task_struct *task,
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
- unsigned int j;
+ unsigned int i;
- if (n == 0)
- return;
-
- WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS);
-
- for (j = 0; j < n; ++j) {
- if (i + j < SYSCALL_MAX_ARGS)
- args[j] = regs->areg[reg[i + j]];
- else
- args[j] = 0;
- }
+ for (i = 0; i < 6; ++i)
+ args[i] = regs->areg[reg[i]];
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
- unsigned int j;
-
- if (n == 0)
- return;
-
- if (WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS)) {
- if (i < SYSCALL_MAX_ARGS)
- n = SYSCALL_MAX_ARGS - i;
- else
- return;
- }
+ unsigned int i;
- for (j = 0; j < n; ++j)
- regs->areg[reg[i + j]] = args[j];
+ for (i = 0; i < 6; ++i)
+ regs->areg[reg[i]] = args[i];
}
asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index 8a7ad40be463..7417847dc438 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
generated-y += unistd_32.h
-generic-y += kvm_para.h
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index e50f5124dc6f..e54af8b7e0f8 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1860,6 +1860,8 @@ ENTRY(system_call)
l32i a7, a2, PT_SYSCALL
1:
+ s32i a7, a1, 4
+
/* syscall = sys_call_table[syscall_nr] */
movi a4, sys_call_table
@@ -1893,8 +1895,12 @@ ENTRY(system_call)
retw
1:
+ l32i a4, a1, 4
+ l32i a3, a2, PT_SYSCALL
+ s32i a4, a2, PT_SYSCALL
mov a6, a2
call4 do_syscall_trace_leave
+ s32i a3, a2, PT_SYSCALL
retw
ENDPROC(system_call)
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 174c11f13bba..b9f82510c650 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -253,10 +253,14 @@ static int return_address_cb(struct stackframe *frame, void *data)
return 1;
}
+/*
+ * level == 0 is for the return address from the caller of this function,
+ * not from this function itself.
+ */
unsigned long return_address(unsigned level)
{
struct return_addr_data r = {
- .skip = level + 1,
+ .skip = level,
};
walk_stackframe(stack_pointer(NULL), return_address_cb, &r);
return r.addr;
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 2fb7d1172228..03678c4afc39 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -33,7 +33,7 @@ static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages)
pte = memblock_alloc_low(n_pages * sizeof(pte_t), PAGE_SIZE);
if (!pte)
- panic("%s: Failed to allocate %zu bytes align=%lx\n",
+ panic("%s: Failed to allocate %lu bytes align=%lx\n",
__func__, n_pages * sizeof(pte_t), PAGE_SIZE);
for (i = 0; i < n_pages; ++i)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 4c592496a16a..fac188dd78fa 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -674,7 +674,7 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
* at least two nodes.
*/
return !(varied_queue_weights || multiple_classes_busy
-#ifdef BFQ_GROUP_IOSCHED_ENABLED
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
|| bfqd->num_groups_with_pending_reqs > 0
#endif
);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 63311d1ff1ed..a11bef75483d 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1012,7 +1012,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
entity->on_st = true;
}
-#ifdef BFQ_GROUP_IOSCHED_ENABLED
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */
struct bfq_group *bfqg =
container_of(entity, struct bfq_group, entity);
diff --git a/block/blk-core.c b/block/blk-core.c
index 4673ebe42255..a55389ba8779 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1245,8 +1245,6 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
*/
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
- blk_qc_t unused;
-
if (blk_cloned_rq_check_limits(q, rq))
return BLK_STS_IOERR;
@@ -1262,7 +1260,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
* bypass a potential scheduler on the bottom device for
* insert.
*/
- return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true);
+ return blk_mq_request_issue_directly(rq, true);
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6e0f2d97fc6d..d95f94892015 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
flush_rq->tag = -1;
} else {
- blk_mq_put_driver_tag_hctx(hctx, flush_rq);
+ blk_mq_put_driver_tag(flush_rq);
flush_rq->internal_tag = -1;
}
@@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
if (q->elevator) {
WARN_ON(rq->tag < 0);
- blk_mq_put_driver_tag_hctx(hctx, rq);
+ blk_mq_put_driver_tag(rq);
}
/*
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 40905539afed..aa6bc5c02643 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -423,10 +423,12 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
- if (!hctx->dispatch_busy && !e && !run_queue_async)
+ if (!hctx->dispatch_busy && !e && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);
- else
- blk_mq_insert_requests(hctx, ctx, list);
+ if (list_empty(list))
+ return;
+ }
+ blk_mq_insert_requests(hctx, ctx, list);
}
blk_mq_run_hw_queue(hctx, run_queue_async);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 70b210a308c4..a9354835cf51 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
}
/*
- * Check if any of the ctx's have pending work in this hardware queue
+ * Check if any of the ctx, dispatch list or elevator
+ * have pending work in this hardware queue.
*/
static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
{
@@ -1071,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
spin_lock(&hctx->dispatch_wait_lock);
- list_del_init(&wait->entry);
+ if (!list_empty(&wait->entry)) {
+ struct sbitmap_queue *sbq;
+
+ list_del_init(&wait->entry);
+ sbq = &hctx->tags->bitmap_tags;
+ atomic_dec(&sbq->ws_active);
+ }
spin_unlock(&hctx->dispatch_wait_lock);
blk_mq_run_hw_queue(hctx, true);
@@ -1087,6 +1094,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
+ struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
struct wait_queue_head *wq;
wait_queue_entry_t *wait;
bool ret;
@@ -1109,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
if (!list_empty_careful(&wait->entry))
return false;
- wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait;
+ wq = &bt_wait_ptr(sbq, hctx)->wait;
spin_lock_irq(&wq->lock);
spin_lock(&hctx->dispatch_wait_lock);
@@ -1119,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
return false;
}
+ atomic_inc(&sbq->ws_active);
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
__add_wait_queue(wq, wait);
@@ -1139,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
* someone else gets the wakeup.
*/
list_del_init(&wait->entry);
+ atomic_dec(&sbq->ws_active);
spin_unlock(&hctx->dispatch_wait_lock);
spin_unlock_irq(&wq->lock);
@@ -1701,11 +1711,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
unsigned int depth;
list_splice_init(&plug->mq_list, &list);
- plug->rq_count = 0;
if (plug->rq_count > 2 && plug->multiple_queues)
list_sort(NULL, &list, plug_rq_cmp);
+ plug->rq_count = 0;
+
this_q = NULL;
this_hctx = NULL;
this_ctx = NULL;
@@ -1790,74 +1801,76 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
return ret;
}
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
blk_qc_t *cookie,
- bool bypass, bool last)
+ bool bypass_insert, bool last)
{
struct request_queue *q = rq->q;
bool run_queue = true;
- blk_status_t ret = BLK_STS_RESOURCE;
- int srcu_idx;
- bool force = false;
- hctx_lock(hctx, &srcu_idx);
/*
- * hctx_lock is needed before checking quiesced flag.
+ * RCU or SRCU read lock is needed before checking quiesced flag.
*
- * When queue is stopped or quiesced, ignore 'bypass', insert
- * and return BLK_STS_OK to caller, and avoid driver to try to
- * dispatch again.
+ * When queue is stopped or quiesced, ignore 'bypass_insert' from
+ * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
+ * and avoid driver to try to dispatch again.
*/
- if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) {
+ if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
run_queue = false;
- bypass = false;
- goto out_unlock;
+ bypass_insert = false;
+ goto insert;
}
- if (unlikely(q->elevator && !bypass))
- goto out_unlock;
+ if (q->elevator && !bypass_insert)
+ goto insert;
if (!blk_mq_get_dispatch_budget(hctx))
- goto out_unlock;
+ goto insert;
if (!blk_mq_get_driver_tag(rq)) {
blk_mq_put_dispatch_budget(hctx);
- goto out_unlock;
+ goto insert;
}
- /*
- * Always add a request that has been through
- *.queue_rq() to the hardware dispatch list.
- */
- force = true;
- ret = __blk_mq_issue_directly(hctx, rq, cookie, last);
-out_unlock:
+ return __blk_mq_issue_directly(hctx, rq, cookie, last);
+insert:
+ if (bypass_insert)
+ return BLK_STS_RESOURCE;
+
+ blk_mq_request_bypass_insert(rq, run_queue);
+ return BLK_STS_OK;
+}
+
+static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, blk_qc_t *cookie)
+{
+ blk_status_t ret;
+ int srcu_idx;
+
+ might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
+
+ hctx_lock(hctx, &srcu_idx);
+
+ ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
+ if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+ blk_mq_request_bypass_insert(rq, true);
+ else if (ret != BLK_STS_OK)
+ blk_mq_end_request(rq, ret);
+
+ hctx_unlock(hctx, srcu_idx);
+}
+
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
+{
+ blk_status_t ret;
+ int srcu_idx;
+ blk_qc_t unused_cookie;
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+ hctx_lock(hctx, &srcu_idx);
+ ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
hctx_unlock(hctx, srcu_idx);
- switch (ret) {
- case BLK_STS_OK:
- break;
- case BLK_STS_DEV_RESOURCE:
- case BLK_STS_RESOURCE:
- if (force) {
- blk_mq_request_bypass_insert(rq, run_queue);
- /*
- * We have to return BLK_STS_OK for the DM
- * to avoid livelock. Otherwise, we return
- * the real result to indicate whether the
- * request is direct-issued successfully.
- */
- ret = bypass ? BLK_STS_OK : ret;
- } else if (!bypass) {
- blk_mq_sched_insert_request(rq, false,
- run_queue, false);
- }
- break;
- default:
- if (!bypass)
- blk_mq_end_request(rq, ret);
- break;
- }
return ret;
}
@@ -1865,20 +1878,22 @@ out_unlock:
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list)
{
- blk_qc_t unused;
- blk_status_t ret = BLK_STS_OK;
-
while (!list_empty(list)) {
+ blk_status_t ret;
struct request *rq = list_first_entry(list, struct request,
queuelist);
list_del_init(&rq->queuelist);
- if (ret == BLK_STS_OK)
- ret = blk_mq_try_issue_directly(hctx, rq, &unused,
- false,
+ ret = blk_mq_request_issue_directly(rq, list_empty(list));
+ if (ret != BLK_STS_OK) {
+ if (ret == BLK_STS_RESOURCE ||
+ ret == BLK_STS_DEV_RESOURCE) {
+ blk_mq_request_bypass_insert(rq,
list_empty(list));
- else
- blk_mq_sched_insert_request(rq, false, true, false);
+ break;
+ }
+ blk_mq_end_request(rq, ret);
+ }
}
/*
@@ -1886,7 +1901,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
* the driver there was more coming, but that turned out to
* be a lie.
*/
- if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs)
+ if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs)
hctx->queue->mq_ops->commit_rqs(hctx);
}
@@ -1993,19 +2008,21 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
plug->rq_count--;
}
blk_add_rq_to_plug(plug, rq);
+ trace_block_plug(q);
blk_mq_put_ctx(data.ctx);
if (same_queue_rq) {
data.hctx = same_queue_rq->mq_hctx;
+ trace_block_unplug(q, 1, true);
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
- &cookie, false, true);
+ &cookie);
}
} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
!data.hctx->dispatch_busy)) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true);
+ blk_mq_try_issue_directly(data.hctx, rq, &cookie);
} else {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
@@ -2322,7 +2339,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
return 0;
free_fq:
- kfree(hctx->fq);
+ blk_free_flush_queue(hctx->fq);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 0ed8e5a8729f..423ea88ab6fb 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -70,10 +70,8 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
- struct request *rq,
- blk_qc_t *cookie,
- bool bypass, bool last);
+/* Used by blk_insert_cloned_request() to issue request directly */
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last);
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list);
@@ -224,15 +222,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
}
}
-static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
-{
- if (rq->tag == -1 || rq->internal_tag == -1)
- return;
-
- __blk_mq_put_driver_tag(hctx, rq);
-}
-
static inline void blk_mq_put_driver_tag(struct request *rq)
{
if (rq->tag == -1 || rq->internal_tag == -1)
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 62d3aa74277b..5e9d7348c16f 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -81,8 +81,12 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info)
ACPI_FUNCTION_TRACE(ev_enable_gpe);
- /* Enable the requested GPE */
+ /* Clear the GPE status */
+ status = acpi_hw_clear_gpe(gpe_event_info);
+ if (ACPI_FAILURE(status))
+ return_ACPI_STATUS(status);
+ /* Enable the requested GPE */
status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE);
return_ACPI_STATUS(status);
}
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 6ecbbabf1233..eec263c9019e 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1043,9 +1043,6 @@ void __init acpi_early_init(void)
acpi_permanent_mmap = true;
- /* Initialize debug output. Linux does not use ACPICA defaults */
- acpi_dbg_level = ACPI_LV_INFO | ACPI_LV_REPAIR;
-
#ifdef CONFIG_X86
/*
* If the machine falls into the DMI check table,
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 1b207fca1420..d4244e7d0e38 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1150,8 +1150,13 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
cpc_read(cpunum, nominal_reg, &nom);
perf_caps->nominal_perf = nom;
- cpc_read(cpunum, guaranteed_reg, &guaranteed);
- perf_caps->guaranteed_perf = guaranteed;
+ if (guaranteed_reg->type != ACPI_TYPE_BUFFER ||
+ IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) {
+ perf_caps->guaranteed_perf = 0;
+ } else {
+ cpc_read(cpunum, guaranteed_reg, &guaranteed);
+ perf_caps->guaranteed_perf = guaranteed;
+ }
cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear);
perf_caps->lowest_nonlinear_perf = min_nonlinear;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 8685882da64c..4b9c7ca492e6 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2057,7 +2057,8 @@ static size_t binder_get_object(struct binder_proc *proc,
size_t object_size = 0;
read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
- if (read_size < sizeof(*hdr) || !IS_ALIGNED(offset, sizeof(u32)))
+ if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
+ !IS_ALIGNED(offset, sizeof(u32)))
return 0;
binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
offset, read_size);
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 6389467670a0..195f120c4e8c 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -927,14 +927,13 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
index = page - alloc->pages;
page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE;
+
+ mm = alloc->vma_vm_mm;
+ if (!mmget_not_zero(mm))
+ goto err_mmget;
+ if (!down_write_trylock(&mm->mmap_sem))
+ goto err_down_write_mmap_sem_failed;
vma = binder_alloc_get_vma(alloc);
- if (vma) {
- if (!mmget_not_zero(alloc->vma_vm_mm))
- goto err_mmget;
- mm = alloc->vma_vm_mm;
- if (!down_read_trylock(&mm->mmap_sem))
- goto err_down_write_mmap_sem_failed;
- }
list_lru_isolate(lru, item);
spin_unlock(lock);
@@ -945,10 +944,9 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
zap_page_range(vma, page_addr, PAGE_SIZE);
trace_binder_unmap_user_end(alloc, index);
-
- up_read(&mm->mmap_sem);
- mmput(mm);
}
+ up_write(&mm->mmap_sem);
+ mmput(mm);
trace_binder_unmap_kernel_start(alloc, index);
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index b3ed8f9953a8..173e6f2dd9af 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -52,38 +52,52 @@ static int eject_tray(struct ata_device *dev)
/* Per the spec, only slot type and drawer type ODD can be supported */
static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev)
{
- char buf[16];
+ char *buf;
unsigned int ret;
- struct rm_feature_desc *desc = (void *)(buf + 8);
+ struct rm_feature_desc *desc;
struct ata_taskfile tf;
static const char cdb[] = { GPCMD_GET_CONFIGURATION,
2, /* only 1 feature descriptor requested */
0, 3, /* 3, removable medium feature */
0, 0, 0,/* reserved */
- 0, sizeof(buf),
+ 0, 16,
0, 0, 0,
};
+ buf = kzalloc(16, GFP_KERNEL);
+ if (!buf)
+ return ODD_MECH_TYPE_UNSUPPORTED;
+ desc = (void *)(buf + 8);
+
ata_tf_init(dev, &tf);
tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
tf.command = ATA_CMD_PACKET;
tf.protocol = ATAPI_PROT_PIO;
- tf.lbam = sizeof(buf);
+ tf.lbam = 16;
ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
- buf, sizeof(buf), 0);
- if (ret)
+ buf, 16, 0);
+ if (ret) {
+ kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED;
+ }
- if (be16_to_cpu(desc->feature_code) != 3)
+ if (be16_to_cpu(desc->feature_code) != 3) {
+ kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED;
+ }
- if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1)
+ if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) {
+ kfree(buf);
return ODD_MECH_TYPE_SLOT;
- else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1)
+ } else if (desc->mech_type == 1 && desc->load == 0 &&
+ desc->eject == 1) {
+ kfree(buf);
return ODD_MECH_TYPE_DRAWER;
- else
+ } else {
+ kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED;
+ }
}
/* Test if ODD is zero power ready by sense code */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 417a9f15c116..d7ac09c092f2 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1748,6 +1748,11 @@ static int __init null_init(void)
return -EINVAL;
}
+ if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
+ pr_err("null_blk: invalid home_node value\n");
+ g_home_node = NUMA_NO_NODE;
+ }
+
if (g_queue_mode == NULL_Q_RQ) {
pr_err("null_blk: legacy IO path no longer available\n");
return -EINVAL;
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 377a694dc228..6d415b20fb70 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -314,6 +314,7 @@ static void pcd_init_units(void)
disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops,
1, BLK_MQ_F_SHOULD_MERGE);
if (IS_ERR(disk->queue)) {
+ put_disk(disk);
disk->queue = NULL;
continue;
}
@@ -750,6 +751,8 @@ static int pcd_detect(void)
printk("%s: No CD-ROM drive found\n", name);
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
blk_cleanup_queue(cd->disk->queue);
cd->disk->queue = NULL;
blk_mq_free_tag_set(&cd->tag_set);
@@ -1010,8 +1013,14 @@ static int __init pcd_init(void)
pcd_probe_capabilities();
if (register_blkdev(major, name)) {
- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
+ for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
+
+ blk_cleanup_queue(cd->disk->queue);
+ blk_mq_free_tag_set(&cd->tag_set);
put_disk(cd->disk);
+ }
return -EBUSY;
}
@@ -1032,6 +1041,9 @@ static void __exit pcd_exit(void)
int unit;
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ if (!cd->disk)
+ continue;
+
if (cd->present) {
del_gendisk(cd->disk);
pi_release(cd->pi);
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 103b617cdc31..35e6e271b219 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -762,6 +762,8 @@ static int pf_detect(void)
printk("%s: No ATAPI disk detected\n", name);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
blk_cleanup_queue(pf->disk->queue);
pf->disk->queue = NULL;
blk_mq_free_tag_set(&pf->tag_set);
@@ -1029,8 +1031,13 @@ static int __init pf_init(void)
pf_busy = 0;
if (register_blkdev(major, name)) {
- for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
+ for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
+ blk_cleanup_queue(pf->disk->queue);
+ blk_mq_free_tag_set(&pf->tag_set);
put_disk(pf->disk);
+ }
return -EBUSY;
}
@@ -1051,6 +1058,9 @@ static void __exit pf_exit(void)
int unit;
unregister_blkdev(major, name);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ if (!pf->disk)
+ continue;
+
if (pf->present)
del_gendisk(pf->disk);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 87ccef4bd69e..32a21b8d1d85 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -1090,6 +1090,8 @@ static int ace_setup(struct ace_device *ace)
return 0;
err_read:
+ /* prevent double queue cleanup */
+ ace->gd->queue = NULL;
put_disk(ace->gd);
err_alloc_disk:
blk_cleanup_queue(ace->queue);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index e7a5f1d1c314..399cad7daae7 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -290,18 +290,8 @@ static ssize_t idle_store(struct device *dev,
struct zram *zram = dev_to_zram(dev);
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
int index;
- char mode_buf[8];
- ssize_t sz;
- sz = strscpy(mode_buf, buf, sizeof(mode_buf));
- if (sz <= 0)
- return -EINVAL;
-
- /* ignore trailing new line */
- if (mode_buf[sz - 1] == '\n')
- mode_buf[sz - 1] = 0x00;
-
- if (strcmp(mode_buf, "all"))
+ if (!sysfs_streq(buf, "all"))
return -EINVAL;
down_read(&zram->init_lock);
@@ -635,25 +625,15 @@ static ssize_t writeback_store(struct device *dev,
struct bio bio;
struct bio_vec bio_vec;
struct page *page;
- ssize_t ret, sz;
- char mode_buf[8];
- int mode = -1;
+ ssize_t ret;
+ int mode;
unsigned long blk_idx = 0;
- sz = strscpy(mode_buf, buf, sizeof(mode_buf));
- if (sz <= 0)
- return -EINVAL;
-
- /* ignore trailing newline */
- if (mode_buf[sz - 1] == '\n')
- mode_buf[sz - 1] = 0x00;
-
- if (!strcmp(mode_buf, "idle"))
+ if (sysfs_streq(buf, "idle"))
mode = IDLE_WRITEBACK;
- else if (!strcmp(mode_buf, "huge"))
+ else if (sysfs_streq(buf, "huge"))
mode = HUGE_WRITEBACK;
-
- if (mode == -1)
+ else
return -EINVAL;
down_read(&zram->init_lock);
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index ded198328f21..7db48ae65cd2 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2942,6 +2942,7 @@ static int btusb_config_oob_wake(struct hci_dev *hdev)
return 0;
}
+ irq_set_status_flags(irq, IRQ_NOAUTOEN);
ret = devm_request_irq(&hdev->dev, irq, btusb_oob_wake_handler,
0, "OOB Wake-on-BT", data);
if (ret) {
@@ -2956,7 +2957,6 @@ static int btusb_config_oob_wake(struct hci_dev *hdev)
}
data->oob_wake_irq = irq;
- disable_irq(irq);
bt_dev_info(hdev, "OOB Wake-on-BT configured at IRQ %u", irq);
return 0;
}
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 72866a004f07..466ebd84ad17 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -348,7 +348,7 @@ config XILINX_HWICAP
config R3964
tristate "Siemens R3964 line discipline"
- depends on TTY
+ depends on TTY && BROKEN
---help---
This driver allows synchronous communication with devices using the
Siemens R3964 packet protocol. Unless you are dealing with special
diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index d8b77133a83a..f824563fc28d 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -37,8 +37,8 @@
*
* Returns size of the event. If it is an invalid event, returns 0.
*/
-static int calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
- struct tcg_pcr_event *event_header)
+static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+ struct tcg_pcr_event *event_header)
{
struct tcg_efi_specid_event_head *efispecid;
struct tcg_event_field *event_field;
diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index 8856cce5a23b..817ae09a369e 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -233,12 +233,19 @@ __poll_t tpm_common_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
poll_wait(file, &priv->async_wait, wait);
+ mutex_lock(&priv->buffer_mutex);
- if (!priv->response_read || priv->response_length)
+ /*
+ * The response_length indicates if there is still response
+ * (or part of it) to be consumed. Partial reads decrease it
+ * by the number of bytes read, and write resets it the zero.
+ */
+ if (priv->response_length)
mask = EPOLLIN | EPOLLRDNORM;
else
mask = EPOLLOUT | EPOLLWRNORM;
+ mutex_unlock(&priv->buffer_mutex);
return mask;
}
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 83ece5639f86..ae1030c9b086 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -402,15 +402,13 @@ int tpm_pm_suspend(struct device *dev)
if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED)
return 0;
- if (chip->flags & TPM_CHIP_FLAG_TPM2) {
- mutex_lock(&chip->tpm_mutex);
- if (!tpm_chip_start(chip)) {
+ if (!tpm_chip_start(chip)) {
+ if (chip->flags & TPM_CHIP_FLAG_TPM2)
tpm2_shutdown(chip, TPM2_SU_STATE);
- tpm_chip_stop(chip);
- }
- mutex_unlock(&chip->tpm_mutex);
- } else {
- rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+ else
+ rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+
+ tpm_chip_stop(chip);
}
return rc;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index e22f0dbaebb1..2986119dd31f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -385,7 +385,10 @@ static int intel_pstate_get_cppc_guranteed(int cpu)
if (ret)
return ret;
- return cppc_perf.guaranteed_perf;
+ if (cppc_perf.guaranteed_perf)
+ return cppc_perf.guaranteed_perf;
+
+ return cppc_perf.nominal_perf;
}
#else /* CONFIG_ACPI_CPPC_LIB */
@@ -2593,6 +2596,9 @@ static int __init intel_pstate_init(void)
const struct x86_cpu_id *id;
int rc;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -ENODEV;
+
if (no_load)
return -ENODEV;
@@ -2608,7 +2614,7 @@ static int __init intel_pstate_init(void)
} else {
id = x86_match_cpu(intel_pstate_cpu_ids);
if (!id) {
- pr_info("CPU ID not supported\n");
+ pr_info("CPU model not supported\n");
return -ENODEV;
}
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index 3f49427766b8..2b51e0718c9f 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -189,8 +189,8 @@ static int scpi_cpufreq_exit(struct cpufreq_policy *policy)
clk_put(priv->clk);
dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
- kfree(priv);
dev_pm_opp_remove_all_dynamic(priv->cpu_dev);
+ kfree(priv);
return 0;
}
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index b1eadc6652b5..7205d9f4029e 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -865,19 +865,18 @@ static int ahash_update_ctx(struct ahash_request *req)
if (ret)
goto unmap_ctx;
- if (mapped_nents) {
+ if (mapped_nents)
sg_to_sec4_sg_last(req->src, mapped_nents,
edesc->sec4_sg + sec4_sg_src_index,
0);
- if (*next_buflen)
- scatterwalk_map_and_copy(next_buf, req->src,
- to_hash - *buflen,
- *next_buflen, 0);
- } else {
+ else
sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index -
1);
- }
+ if (*next_buflen)
+ scatterwalk_map_and_copy(next_buf, req->src,
+ to_hash - *buflen,
+ *next_buflen, 0);
desc = edesc->hw_desc;
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index 4e0eede599a8..ac0301b69593 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -1578,11 +1578,9 @@ static int stm32_mdma_probe(struct platform_device *pdev)
dmadev->nr_channels = nr_channels;
dmadev->nr_requests = nr_requests;
- ret = device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks",
+ device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks",
dmadev->ahb_addr_masks,
count);
- if (ret)
- return ret;
dmadev->nr_ahb_addr_masks = count;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c
index 91b90c0cea73..12acdac85820 100644
--- a/drivers/gpio/gpio-adnp.c
+++ b/drivers/gpio/gpio-adnp.c
@@ -132,8 +132,10 @@ static int adnp_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
if (err < 0)
goto out;
- if (err & BIT(pos))
- err = -EACCES;
+ if (value & BIT(pos)) {
+ err = -EPERM;
+ goto out;
+ }
err = 0;
diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index 854bce4fb9e7..217507002dbc 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -1224,6 +1224,8 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev)
gpio->offset_timer =
devm_kzalloc(&pdev->dev, gpio->chip.ngpio, GFP_KERNEL);
+ if (!gpio->offset_timer)
+ return -ENOMEM;
return aspeed_gpio_setup_irqs(gpio, pdev);
}
diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c
index 0ecd2369c2ca..a09d2f9ebacc 100644
--- a/drivers/gpio/gpio-exar.c
+++ b/drivers/gpio/gpio-exar.c
@@ -148,6 +148,8 @@ static int gpio_exar_probe(struct platform_device *pdev)
mutex_init(&exar_gpio->lock);
index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL);
+ if (index < 0)
+ goto err_destroy;
sprintf(exar_gpio->name, "exar_gpio%d", index);
exar_gpio->gpio_chip.label = exar_gpio->name;
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 154d959e8993..b6a4efce7c92 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -204,8 +204,8 @@ static ssize_t gpio_mockup_debugfs_read(struct file *file,
struct gpio_mockup_chip *chip;
struct seq_file *sfile;
struct gpio_chip *gc;
+ int val, cnt;
char buf[3];
- int val, rv;
if (*ppos != 0)
return 0;
@@ -216,13 +216,9 @@ static ssize_t gpio_mockup_debugfs_read(struct file *file,
gc = &chip->gc;
val = gpio_mockup_get(gc, priv->offset);
- snprintf(buf, sizeof(buf), "%d\n", val);
+ cnt = snprintf(buf, sizeof(buf), "%d\n", val);
- rv = copy_to_user(usr_buf, buf, sizeof(buf));
- if (rv)
- return rv;
-
- return sizeof(buf) - 1;
+ return simple_read_from_buffer(usr_buf, size, ppos, buf, cnt);
}
static ssize_t gpio_mockup_debugfs_write(struct file *file,
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 8b9c3ab70f6e..6a3ec575a404 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -120,7 +120,8 @@ static void of_gpio_flags_quirks(struct device_node *np,
* to determine if the flags should have inverted semantics.
*/
if (IS_ENABLED(CONFIG_SPI_MASTER) &&
- of_property_read_bool(np, "cs-gpios")) {
+ of_property_read_bool(np, "cs-gpios") &&
+ !strcmp(propname, "cs-gpios")) {
struct device_node *child;
u32 cs;
int ret;
@@ -142,16 +143,16 @@ static void of_gpio_flags_quirks(struct device_node *np,
* conflict and the "spi-cs-high" flag will
* take precedence.
*/
- if (of_property_read_bool(np, "spi-cs-high")) {
+ if (of_property_read_bool(child, "spi-cs-high")) {
if (*flags & OF_GPIO_ACTIVE_LOW) {
pr_warn("%s GPIO handle specifies active low - ignored\n",
- of_node_full_name(np));
+ of_node_full_name(child));
*flags &= ~OF_GPIO_ACTIVE_LOW;
}
} else {
if (!(*flags & OF_GPIO_ACTIVE_LOW))
pr_info("%s enforce active low on chipselect handle\n",
- of_node_full_name(np));
+ of_node_full_name(child));
*flags |= OF_GPIO_ACTIVE_LOW;
}
break;
@@ -717,7 +718,13 @@ int of_gpiochip_add(struct gpio_chip *chip)
of_node_get(chip->of_node);
- return of_gpiochip_scan_gpios(chip);
+ status = of_gpiochip_scan_gpios(chip);
+ if (status) {
+ of_node_put(chip->of_node);
+ gpiochip_remove_pin_ranges(chip);
+ }
+
+ return status;
}
void of_gpiochip_remove(struct gpio_chip *chip)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 144af0733581..0495bf1d480a 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2776,7 +2776,7 @@ int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
}
config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce);
- return gpio_set_config(chip, gpio_chip_hwgpio(desc), config);
+ return chip->set_config(chip, gpio_chip_hwgpio(desc), config);
}
EXPORT_SYMBOL_GPL(gpiod_set_debounce);
@@ -2813,7 +2813,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE,
!transitory);
gpio = gpio_chip_hwgpio(desc);
- rc = gpio_set_config(chip, gpio, packed);
+ rc = chip->set_config(chip, gpio, packed);
if (rc == -ENOTSUPP) {
dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n",
gpio);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4f8fb4ecde34..ac0d646a7b74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3625,6 +3625,7 @@ static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
struct pci_dev *pdev = adev->pdev;
enum pci_bus_speed cur_speed;
enum pcie_link_width cur_width;
+ u32 ret = 1;
*speed = PCI_SPEED_UNKNOWN;
*width = PCIE_LNK_WIDTH_UNKNOWN;
@@ -3632,6 +3633,10 @@ static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
while (pdev) {
cur_speed = pcie_get_speed_cap(pdev);
cur_width = pcie_get_width_cap(pdev);
+ ret = pcie_bandwidth_available(adev->pdev, NULL,
+ NULL, &cur_width);
+ if (!ret)
+ cur_width = PCIE_LNK_WIDTH_RESRV;
if (cur_speed != PCI_SPEED_UNKNOWN) {
if (*speed == PCI_SPEED_UNKNOWN)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d0309e8c9d12..a11db2b1a63f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2405,8 +2405,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
/* disable CG */
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
- adev->gfx.rlc.funcs->reset(adev);
-
gfx_v9_0_init_pg(adev);
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index fb27783d7a54..81127f7d6ed1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5429,9 +5429,11 @@ static void get_freesync_config_for_crtc(
struct amdgpu_dm_connector *aconnector =
to_amdgpu_dm_connector(new_con_state->base.connector);
struct drm_display_mode *mode = &new_crtc_state->base.mode;
+ int vrefresh = drm_mode_vrefresh(mode);
new_crtc_state->vrr_supported = new_con_state->freesync_capable &&
- aconnector->min_vfreq <= drm_mode_vrefresh(mode);
+ vrefresh >= aconnector->min_vfreq &&
+ vrefresh <= aconnector->max_vfreq;
if (new_crtc_state->vrr_supported) {
new_crtc_state->stream->ignore_msa_timing_param = true;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 4eba3c4800b6..ea18e9c2d8ce 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -2660,12 +2660,18 @@ void core_link_enable_stream(
void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)
{
struct dc *core_dc = pipe_ctx->stream->ctx->dc;
+ struct dc_stream_state *stream = pipe_ctx->stream;
core_dc->hwss.blank_stream(pipe_ctx);
if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
deallocate_mst_payload(pipe_ctx);
+ if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
+ dal_ddc_service_write_scdc_data(
+ stream->link->ddc, 0,
+ stream->timing.flags.LTE_340MCSC_SCRAMBLE);
+
core_dc->hwss.disable_stream(pipe_ctx, option);
disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 9aa7bec1b5fe..23b5b94a4939 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -91,6 +91,12 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
* MP0CLK DS
*/
data->registry_data.disallowed_features = 0xE0041C00;
+ /* ECC feature should be disabled on old SMUs */
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion);
+ hwmgr->smu_version = smum_get_argument(hwmgr);
+ if (hwmgr->smu_version < 0x282100)
+ data->registry_data.disallowed_features |= FEATURE_ECC_MASK;
+
data->registry_data.od_state_in_dc_support = 0;
data->registry_data.thermal_support = 1;
data->registry_data.skip_baco_hardware = 0;
@@ -357,6 +363,7 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
data->smu_features[GNLD_DS_MP1CLK].smu_feature_id = FEATURE_DS_MP1CLK_BIT;
data->smu_features[GNLD_DS_MP0CLK].smu_feature_id = FEATURE_DS_MP0CLK_BIT;
data->smu_features[GNLD_XGMI].smu_feature_id = FEATURE_XGMI_BIT;
+ data->smu_features[GNLD_ECC].smu_feature_id = FEATURE_ECC_BIT;
for (i = 0; i < GNLD_FEATURES_MAX; i++) {
data->smu_features[i].smu_feature_bitmap =
@@ -3020,7 +3027,8 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf)
"FCLK_DS",
"MP1CLK_DS",
"MP0CLK_DS",
- "XGMI"};
+ "XGMI",
+ "ECC"};
static const char *output_title[] = {
"FEATURES",
"BITMASK",
@@ -3462,6 +3470,7 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
struct vega20_single_dpm_table *dpm_table;
bool vblank_too_short = false;
bool disable_mclk_switching;
+ bool disable_fclk_switching;
uint32_t i, latency;
disable_mclk_switching = ((1 < hwmgr->display_config->num_display) &&
@@ -3537,13 +3546,20 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
if (hwmgr->display_config->nb_pstate_switch_disable)
dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
+ if ((disable_mclk_switching &&
+ (dpm_table->dpm_state.hard_min_level == dpm_table->dpm_levels[dpm_table->count - 1].value)) ||
+ hwmgr->display_config->min_mem_set_clock / 100 >= dpm_table->dpm_levels[dpm_table->count - 1].value)
+ disable_fclk_switching = true;
+ else
+ disable_fclk_switching = false;
+
/* fclk */
dpm_table = &(data->dpm_table.fclk_table);
dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
- if (hwmgr->display_config->nb_pstate_switch_disable)
+ if (hwmgr->display_config->nb_pstate_switch_disable || disable_fclk_switching)
dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
/* vclk */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
index a5bc758ae097..ac2a3118a0ae 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
@@ -80,6 +80,7 @@ enum {
GNLD_DS_MP1CLK,
GNLD_DS_MP0CLK,
GNLD_XGMI,
+ GNLD_ECC,
GNLD_FEATURES_MAX
};
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
index 63d5cf691549..195c4ae67058 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
@@ -99,7 +99,7 @@
#define FEATURE_DS_MP1CLK_BIT 30
#define FEATURE_DS_MP0CLK_BIT 31
#define FEATURE_XGMI_BIT 32
-#define FEATURE_SPARE_33_BIT 33
+#define FEATURE_ECC_BIT 33
#define FEATURE_SPARE_34_BIT 34
#define FEATURE_SPARE_35_BIT 35
#define FEATURE_SPARE_36_BIT 36
@@ -165,7 +165,8 @@
#define FEATURE_DS_FCLK_MASK (1 << FEATURE_DS_FCLK_BIT )
#define FEATURE_DS_MP1CLK_MASK (1 << FEATURE_DS_MP1CLK_BIT )
#define FEATURE_DS_MP0CLK_MASK (1 << FEATURE_DS_MP0CLK_BIT )
-#define FEATURE_XGMI_MASK (1 << FEATURE_XGMI_BIT )
+#define FEATURE_XGMI_MASK (1ULL << FEATURE_XGMI_BIT )
+#define FEATURE_ECC_MASK (1ULL << FEATURE_ECC_BIT )
#define DPM_OVERRIDE_DISABLE_SOCCLK_PID 0x00000001
#define DPM_OVERRIDE_DISABLE_UCLK_PID 0x00000002
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 381581b01d48..05bbc2b622fc 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -376,11 +376,7 @@ void drm_dev_unplug(struct drm_device *dev)
synchronize_srcu(&drm_unplug_srcu);
drm_dev_unregister(dev);
-
- mutex_lock(&drm_global_mutex);
- if (dev->open_count == 0)
- drm_dev_put(dev);
- mutex_unlock(&drm_global_mutex);
+ drm_dev_put(dev);
}
EXPORT_SYMBOL(drm_dev_unplug);
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 0e9349ff2d16..af2ab640cadb 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1963,7 +1963,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
best_depth = fmt->depth;
}
}
- if (sizes.surface_depth != best_depth) {
+ if (sizes.surface_depth != best_depth && best_depth) {
DRM_INFO("requested bpp %d, scaled depth down to %d",
sizes.surface_bpp, best_depth);
sizes.surface_depth = best_depth;
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 83a5bbca6e7e..7caa3c7ed978 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -489,11 +489,9 @@ int drm_release(struct inode *inode, struct file *filp)
drm_close_helper(filp);
- if (!--dev->open_count) {
+ if (!--dev->open_count)
drm_lastclose(dev);
- if (drm_dev_is_unplugged(dev))
- drm_put_dev(dev);
- }
+
mutex_unlock(&drm_global_mutex);
drm_minor_release(minor);
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 35b4ec3f7618..3592d04c33b2 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1441,7 +1441,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
}
if (index_mode) {
- if (guest_gma >= I915_GTT_PAGE_SIZE / sizeof(u64)) {
+ if (guest_gma >= I915_GTT_PAGE_SIZE) {
ret = -EFAULT;
goto err;
}
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 035479e273be..e3f9caa7839f 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -448,7 +448,7 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt)
/**
* intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU
* @vgpu: a vGPU
- * @conncted: link state
+ * @connected: link state
*
* This function is used to trigger hotplug interrupt for vGPU
*
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 3e7e2b80c857..5d887f7cc0d5 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -238,9 +238,6 @@ static int vgpu_get_plane_info(struct drm_device *dev,
default:
gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled);
}
-
- info->size = (((p.stride * p.height * p.bpp) / 8) +
- (PAGE_SIZE - 1)) >> PAGE_SHIFT;
} else if (plane_id == DRM_PLANE_TYPE_CURSOR) {
ret = intel_vgpu_decode_cursor_plane(vgpu, &c);
if (ret)
@@ -262,14 +259,13 @@ static int vgpu_get_plane_info(struct drm_device *dev,
info->x_hot = UINT_MAX;
info->y_hot = UINT_MAX;
}
-
- info->size = (((info->stride * c.height * c.bpp) / 8)
- + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
} else {
gvt_vgpu_err("invalid plane id:%d\n", plane_id);
return -EINVAL;
}
+ info->size = (info->stride * info->height + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
if (info->size == 0) {
gvt_vgpu_err("fb size is zero\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index c7103dd2d8d5..cf133ef03873 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1882,7 +1882,11 @@ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
}
list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
+
+ mutex_lock(&gvt->gtt.ppgtt_mm_lock);
list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
+ mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
+
return mm;
}
@@ -1942,7 +1946,7 @@ void _intel_vgpu_mm_release(struct kref *mm_ref)
*/
void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
{
- atomic_dec(&mm->pincount);
+ atomic_dec_if_positive(&mm->pincount);
}
/**
@@ -1967,9 +1971,10 @@ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
if (ret)
return ret;
+ mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
list_move_tail(&mm->ppgtt_mm.lru_list,
&mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
-
+ mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
}
return 0;
@@ -1980,6 +1985,8 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
struct intel_vgpu_mm *mm;
struct list_head *pos, *n;
+ mutex_lock(&gvt->gtt.ppgtt_mm_lock);
+
list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
@@ -1987,9 +1994,11 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
continue;
list_del_init(&mm->ppgtt_mm.lru_list);
+ mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
invalidate_ppgtt_mm(mm);
return 1;
}
+ mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
return 0;
}
@@ -2659,6 +2668,7 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
}
}
INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
+ mutex_init(&gvt->gtt.ppgtt_mm_lock);
return 0;
}
@@ -2699,7 +2709,9 @@ void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
if (mm->type == INTEL_GVT_MM_PPGTT) {
+ mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
list_del_init(&mm->ppgtt_mm.lru_list);
+ mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
if (mm->ppgtt_mm.shadowed)
invalidate_ppgtt_mm(mm);
}
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index d8cb04cc946d..edb610dc5d86 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -88,6 +88,7 @@ struct intel_gvt_gtt {
void (*mm_free_page_table)(struct intel_vgpu_mm *mm);
struct list_head oos_page_use_list_head;
struct list_head oos_page_free_list_head;
+ struct mutex ppgtt_mm_lock;
struct list_head ppgtt_mm_lru_list_head;
struct page *scratch_page;
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 7d84cfb9051a..7902fb162d09 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -132,6 +132,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
{RCS, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
+ {RCS, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */
{RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
{RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 1bb8f936fdaa..05b953793316 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -346,7 +346,7 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
int i = 0;
if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed)
- return -1;
+ return -EINVAL;
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
px_dma(&ppgtt->pml4) = mm->ppgtt_mm.shadow_pdps[0];
@@ -410,12 +410,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
if (workload->shadow)
return 0;
- ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
- if (ret < 0) {
- gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
- return ret;
- }
-
/* pin shadow context by gvt even the shadow context will be pinned
* when i915 alloc request. That is because gvt will update the guest
* context from shadow context when workload is completed, and at that
@@ -678,6 +672,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+ struct intel_vgpu_submission *s = &vgpu->submission;
+ struct i915_gem_context *shadow_ctx = s->shadow_ctx;
+ struct i915_request *rq;
int ring_id = workload->ring_id;
int ret;
@@ -687,6 +684,12 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
mutex_lock(&vgpu->vgpu_lock);
mutex_lock(&dev_priv->drm.struct_mutex);
+ ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
+ if (ret < 0) {
+ gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
+ goto err_req;
+ }
+
ret = intel_gvt_workload_req_alloc(workload);
if (ret)
goto err_req;
@@ -703,6 +706,14 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
ret = prepare_workload(workload);
out:
+ if (ret) {
+ /* We might still need to add request with
+ * clean ctx to retire it properly..
+ */
+ rq = fetch_and_zero(&workload->req);
+ i915_request_put(rq);
+ }
+
if (!IS_ERR_OR_NULL(workload->req)) {
gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
ring_id, workload->req);
@@ -739,7 +750,8 @@ static struct intel_vgpu_workload *pick_next_workload(
goto out;
}
- if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
+ if (!scheduler->current_vgpu->active ||
+ list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
goto out;
/*
@@ -1474,8 +1486,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
intel_runtime_pm_put_unchecked(dev_priv);
}
- if (ret && (vgpu_is_vm_unhealthy(ret))) {
- enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
+ if (ret) {
+ if (vgpu_is_vm_unhealthy(ret))
+ enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
intel_vgpu_destroy_workload(workload);
return ERR_PTR(ret);
}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0bd890c04fe4..f6f6e5b78e97 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4830,7 +4830,10 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
&ctx);
if (ret) {
- ret = -EINTR;
+ if (ret == -EDEADLK && !drm_modeset_backoff(&ctx)) {
+ try_again = true;
+ continue;
+ }
break;
}
crtc = connector->state->crtc;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9adc7bb9e69c..a67a63b5aa84 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2346,7 +2346,8 @@ static inline unsigned int i915_sg_segment_size(void)
INTEL_DEVID(dev_priv) == 0x5915 || \
INTEL_DEVID(dev_priv) == 0x591E)
#define IS_AML_ULX(dev_priv) (INTEL_DEVID(dev_priv) == 0x591C || \
- INTEL_DEVID(dev_priv) == 0x87C0)
+ INTEL_DEVID(dev_priv) == 0x87C0 || \
+ INTEL_DEVID(dev_priv) == 0x87CA)
#define IS_SKL_GT2(dev_priv) (IS_SKYLAKE(dev_priv) && \
INTEL_INFO(dev_priv)->gt == 2)
#define IS_SKL_GT3(dev_priv) (IS_SKYLAKE(dev_priv) && \
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 638a586469f9..047855dd8c6b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2863,7 +2863,7 @@ enum i915_power_well_id {
#define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140)
#define GEN11_GT_VDBOX_DISABLE_MASK 0xff
#define GEN11_GT_VEBOX_DISABLE_SHIFT 16
-#define GEN11_GT_VEBOX_DISABLE_MASK (0xff << GEN11_GT_VEBOX_DISABLE_SHIFT)
+#define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
#define GEN11_EU_DISABLE _MMIO(0x9134)
#define GEN11_EU_DIS_MASK 0xFF
@@ -9243,7 +9243,7 @@ enum skl_power_gate {
#define TRANS_DDI_FUNC_CTL2(tran) _MMIO_TRANS2(tran, \
_TRANS_DDI_FUNC_CTL2_A)
#define PORT_SYNC_MODE_ENABLE (1 << 4)
-#define PORT_SYNC_MODE_MASTER_SELECT(x) ((x) < 0)
+#define PORT_SYNC_MODE_MASTER_SELECT(x) ((x) << 0)
#define PORT_SYNC_MODE_MASTER_SELECT_MASK (0x7 << 0)
#define PORT_SYNC_MODE_MASTER_SELECT_SHIFT 0
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 32dce7176f63..b9b0ea4e2404 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -455,7 +455,7 @@ static int igt_evict_contexts(void *arg)
struct i915_gem_context *ctx;
ctx = live_context(i915, file);
- if (!ctx)
+ if (IS_ERR(ctx))
break;
/* We will need some GGTT space for the rq's context */
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 2281ed3eb774..8a4ebcb6405c 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -337,12 +337,14 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
ret = drm_dev_register(drm, 0);
if (ret)
- goto free_drm;
+ goto uninstall_irq;
drm_fbdev_generic_setup(drm, 32);
return 0;
+uninstall_irq:
+ drm_irq_uninstall(drm);
free_drm:
drm_dev_put(drm);
@@ -356,8 +358,8 @@ static int meson_drv_bind(struct device *dev)
static void meson_drv_unbind(struct device *dev)
{
- struct drm_device *drm = dev_get_drvdata(dev);
- struct meson_drm *priv = drm->dev_private;
+ struct meson_drm *priv = dev_get_drvdata(dev);
+ struct drm_device *drm = priv->drm;
if (priv->canvas) {
meson_canvas_free(priv->canvas, priv->canvas_id_osd1);
@@ -367,6 +369,7 @@ static void meson_drv_unbind(struct device *dev)
}
drm_dev_unregister(drm);
+ drm_irq_uninstall(drm);
drm_kms_helper_poll_fini(drm);
drm_mode_config_cleanup(drm);
drm_dev_put(drm);
diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
index e28814f4ea6c..563953ec6ad0 100644
--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -569,7 +569,8 @@ dw_hdmi_mode_valid(struct drm_connector *connector,
DRM_DEBUG_DRIVER("Modeline " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode));
/* If sink max TMDS clock, we reject the mode */
- if (mode->clock > connector->display_info.max_tmds_clock)
+ if (connector->display_info.max_tmds_clock &&
+ mode->clock > connector->display_info.max_tmds_clock)
return MODE_BAD;
/* Check against non-VIC supported modes */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index c7d4c6073ea5..0d4ade9d4722 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -541,6 +541,18 @@ static void vop_core_clks_disable(struct vop *vop)
clk_disable(vop->hclk);
}
+static void vop_win_disable(struct vop *vop, const struct vop_win_data *win)
+{
+ if (win->phy->scl && win->phy->scl->ext) {
+ VOP_SCL_SET_EXT(vop, win, yrgb_hor_scl_mode, SCALE_NONE);
+ VOP_SCL_SET_EXT(vop, win, yrgb_ver_scl_mode, SCALE_NONE);
+ VOP_SCL_SET_EXT(vop, win, cbcr_hor_scl_mode, SCALE_NONE);
+ VOP_SCL_SET_EXT(vop, win, cbcr_ver_scl_mode, SCALE_NONE);
+ }
+
+ VOP_WIN_SET(vop, win, enable, 0);
+}
+
static int vop_enable(struct drm_crtc *crtc)
{
struct vop *vop = to_vop(crtc);
@@ -586,7 +598,7 @@ static int vop_enable(struct drm_crtc *crtc)
struct vop_win *vop_win = &vop->win[i];
const struct vop_win_data *win = vop_win->data;
- VOP_WIN_SET(vop, win, enable, 0);
+ vop_win_disable(vop, win);
}
spin_unlock(&vop->reg_lock);
@@ -735,7 +747,7 @@ static void vop_plane_atomic_disable(struct drm_plane *plane,
spin_lock(&vop->reg_lock);
- VOP_WIN_SET(vop, win, enable, 0);
+ vop_win_disable(vop, win);
spin_unlock(&vop->reg_lock);
}
@@ -1622,7 +1634,7 @@ static int vop_initial(struct vop *vop)
int channel = i * 2 + 1;
VOP_WIN_SET(vop, win, channel, (channel + 1) << 4 | channel);
- VOP_WIN_SET(vop, win, enable, 0);
+ vop_win_disable(vop, win);
VOP_WIN_SET(vop, win, gate, 1);
}
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index ba9b3cfb8c3d..b3436c2aed68 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -378,14 +378,16 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane,
static void tegra_shared_plane_atomic_disable(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
- struct tegra_dc *dc = to_tegra_dc(old_state->crtc);
struct tegra_plane *p = to_tegra_plane(plane);
+ struct tegra_dc *dc;
u32 value;
/* rien ne va plus */
if (!old_state || !old_state->crtc)
return;
+ dc = to_tegra_dc(old_state->crtc);
+
/*
* XXX Legacy helpers seem to sometimes call ->atomic_disable() even
* on planes that are already disabled. Make sure we fallback to the
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 39bfed9623de..982ce37ecde1 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -106,6 +106,7 @@ static int vic_boot(struct vic *vic)
if (vic->booted)
return 0;
+#ifdef CONFIG_IOMMU_API
if (vic->config->supports_sid) {
struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev);
u32 value;
@@ -121,6 +122,7 @@ static int vic_boot(struct vic *vic)
vic_writel(vic, value, VIC_THI_STREAMID1);
}
}
+#endif
/* setup clockgating registers */
vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) |
diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
index 66885c24590f..c1bd5e3d9e4a 100644
--- a/drivers/gpu/drm/udl/udl_connector.c
+++ b/drivers/gpu/drm/udl/udl_connector.c
@@ -18,18 +18,19 @@
#include "udl_connector.h"
#include "udl_drv.h"
-static bool udl_get_edid_block(struct udl_device *udl, int block_idx,
- u8 *buff)
+static int udl_get_edid_block(void *data, u8 *buf, unsigned int block,
+ size_t len)
{
int ret, i;
u8 *read_buff;
+ struct udl_device *udl = data;
read_buff = kmalloc(2, GFP_KERNEL);
if (!read_buff)
- return false;
+ return -1;
- for (i = 0; i < EDID_LENGTH; i++) {
- int bval = (i + block_idx * EDID_LENGTH) << 8;
+ for (i = 0; i < len; i++) {
+ int bval = (i + block * EDID_LENGTH) << 8;
ret = usb_control_msg(udl->udev,
usb_rcvctrlpipe(udl->udev, 0),
(0x02), (0x80 | (0x02 << 5)), bval,
@@ -37,60 +38,13 @@ static bool udl_get_edid_block(struct udl_device *udl, int block_idx,
if (ret < 1) {
DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret);
kfree(read_buff);
- return false;
+ return -1;
}
- buff[i] = read_buff[1];
+ buf[i] = read_buff[1];
}
kfree(read_buff);
- return true;
-}
-
-static bool udl_get_edid(struct udl_device *udl, u8 **result_buff,
- int *result_buff_size)
-{
- int i, extensions;
- u8 *block_buff = NULL, *buff_ptr;
-
- block_buff = kmalloc(EDID_LENGTH, GFP_KERNEL);
- if (block_buff == NULL)
- return false;
-
- if (udl_get_edid_block(udl, 0, block_buff) &&
- memchr_inv(block_buff, 0, EDID_LENGTH)) {
- extensions = ((struct edid *)block_buff)->extensions;
- if (extensions > 0) {
- /* we have to read all extensions one by one */
- *result_buff_size = EDID_LENGTH * (extensions + 1);
- *result_buff = kmalloc(*result_buff_size, GFP_KERNEL);
- buff_ptr = *result_buff;
- if (buff_ptr == NULL) {
- kfree(block_buff);
- return false;
- }
- memcpy(buff_ptr, block_buff, EDID_LENGTH);
- kfree(block_buff);
- buff_ptr += EDID_LENGTH;
- for (i = 1; i < extensions; ++i) {
- if (udl_get_edid_block(udl, i, buff_ptr)) {
- buff_ptr += EDID_LENGTH;
- } else {
- kfree(*result_buff);
- *result_buff = NULL;
- return false;
- }
- }
- return true;
- }
- /* we have only base edid block */
- *result_buff = block_buff;
- *result_buff_size = EDID_LENGTH;
- return true;
- }
-
- kfree(block_buff);
-
- return false;
+ return 0;
}
static int udl_get_modes(struct drm_connector *connector)
@@ -122,8 +76,6 @@ static enum drm_mode_status udl_mode_valid(struct drm_connector *connector,
static enum drm_connector_status
udl_detect(struct drm_connector *connector, bool force)
{
- u8 *edid_buff = NULL;
- int edid_buff_size = 0;
struct udl_device *udl = connector->dev->dev_private;
struct udl_drm_connector *udl_connector =
container_of(connector,
@@ -136,12 +88,10 @@ udl_detect(struct drm_connector *connector, bool force)
udl_connector->edid = NULL;
}
-
- if (!udl_get_edid(udl, &edid_buff, &edid_buff_size))
+ udl_connector->edid = drm_do_get_edid(connector, udl_get_edid_block, udl);
+ if (!udl_connector->edid)
return connector_status_disconnected;
- udl_connector->edid = (struct edid *)edid_buff;
-
return connector_status_connected;
}
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 5930facd6d2d..11a8f99ba18c 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -191,13 +191,9 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
ret = drm_gem_handle_create(file, &obj->base, handle);
drm_gem_object_put_unlocked(&obj->base);
if (ret)
- goto err;
+ return ERR_PTR(ret);
return &obj->base;
-
-err:
- __vgem_gem_destroy(obj);
- return ERR_PTR(ret);
}
static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
diff --git a/drivers/gpu/drm/vkms/vkms_gem.c b/drivers/gpu/drm/vkms/vkms_gem.c
index 138b0bb325cf..69048e73377d 100644
--- a/drivers/gpu/drm/vkms/vkms_gem.c
+++ b/drivers/gpu/drm/vkms/vkms_gem.c
@@ -111,11 +111,8 @@ struct drm_gem_object *vkms_gem_create(struct drm_device *dev,
ret = drm_gem_handle_create(file, &obj->gem, handle);
drm_gem_object_put_unlocked(&obj->gem);
- if (ret) {
- drm_gem_object_release(&obj->gem);
- kfree(obj);
+ if (ret)
return ERR_PTR(ret);
- }
return &obj->gem;
}
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 6ca8d322b487..4ca0cdfa6b33 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -150,6 +150,7 @@ config HID_ASUS
tristate "Asus"
depends on LEDS_CLASS
depends on ASUS_WMI || ASUS_WMI=n
+ select POWER_SUPPLY
---help---
Support for Asus notebook built-in keyboard and touchpad via i2c, and
the Asus Republic of Gamers laptop keyboard special keys.
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 9993b692598f..860e21ec6a49 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1301,10 +1301,10 @@ static u32 __extract(u8 *report, unsigned offset, int n)
u32 hid_field_extract(const struct hid_device *hid, u8 *report,
unsigned offset, unsigned n)
{
- if (n > 32) {
- hid_warn(hid, "hid_field_extract() called with n (%d) > 32! (%s)\n",
+ if (n > 256) {
+ hid_warn(hid, "hid_field_extract() called with n (%d) > 256! (%s)\n",
n, current->comm);
- n = 32;
+ n = 256;
}
return __extract(report, offset, n);
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index ac9fda1b5a72..1384e57182af 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1060,10 +1060,15 @@ static int hid_debug_rdesc_show(struct seq_file *f, void *p)
seq_printf(f, "\n\n");
/* dump parsed data and input mappings */
+ if (down_interruptible(&hdev->driver_input_lock))
+ return 0;
+
hid_dump_device(hdev, f);
seq_printf(f, "\n");
hid_dump_input_mapping(hdev, f);
+ up(&hdev->driver_input_lock);
+
return 0;
}
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index b6d93f4ad037..adce58f24f76 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1083,6 +1083,7 @@
#define USB_DEVICE_ID_SYNAPTICS_HD 0x0ac3
#define USB_DEVICE_ID_SYNAPTICS_QUAD_HD 0x1ac3
#define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710
+#define I2C_DEVICE_ID_SYNAPTICS_7E7E 0x7e7e
#define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047
#define USB_DEVICE_ID_TEXAS_INSTRUMENTS_LENOVO_YOGA 0x0855
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index b10b1922c5bd..1fce0076e7dc 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -998,6 +998,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
case 0x1b8: map_key_clear(KEY_VIDEO); break;
case 0x1bc: map_key_clear(KEY_MESSENGER); break;
case 0x1bd: map_key_clear(KEY_INFO); break;
+ case 0x1cb: map_key_clear(KEY_ASSISTANT); break;
case 0x201: map_key_clear(KEY_NEW); break;
case 0x202: map_key_clear(KEY_OPEN); break;
case 0x203: map_key_clear(KEY_CLOSE); break;
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 15ed6177a7a3..199cc256e9d9 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -2111,6 +2111,13 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
kfree(data);
return -ENOMEM;
}
+ data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue");
+ if (!data->wq) {
+ kfree(data->effect_ids);
+ kfree(data);
+ return -ENOMEM;
+ }
+
data->hidpp = hidpp;
data->feature_index = feature_index;
data->version = version;
@@ -2155,7 +2162,6 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
/* ignore boost value at response.fap.params[2] */
/* init the hardware command queue */
- data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue");
atomic_set(&data->workqueue_size, 0);
/* initialize with zero autocenter to get wheel in usable state */
@@ -2608,8 +2614,9 @@ static int m560_raw_event(struct hid_device *hdev, u8 *data, int size)
input_report_rel(mydata->input, REL_Y, v);
v = hid_snto32(data[6], 8);
- hidpp_scroll_counter_handle_scroll(
- &hidpp->vertical_wheel_counter, v);
+ if (v != 0)
+ hidpp_scroll_counter_handle_scroll(
+ &hidpp->vertical_wheel_counter, v);
input_sync(mydata->input);
}
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 953908f2267c..77ffba48cc73 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -715,7 +715,6 @@ static const struct hid_device_id hid_ignore_list[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_DEALEXTREAME, USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701) },
{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) },
- { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) },
@@ -855,7 +854,7 @@ static const struct hid_device_id hid_ignore_list[] = {
{ }
};
-/**
+/*
* hid_mouse_ignore_list - mouse devices which should not be handled by the hid layer
*
* There are composite devices for which we want to ignore only a certain
@@ -996,6 +995,10 @@ bool hid_ignore(struct hid_device *hdev)
if (hdev->product == 0x0401 &&
strncmp(hdev->name, "ELAN0800", 8) != 0)
return true;
+ /* Same with product id 0x0400 */
+ if (hdev->product == 0x0400 &&
+ strncmp(hdev->name, "QTEC0001", 8) != 0)
+ return true;
break;
}
@@ -1042,7 +1045,7 @@ static struct hid_device_id *hid_exists_dquirk(const struct hid_device *hdev)
}
if (bl_entry != NULL)
- dbg_hid("Found dynamic quirk 0x%lx for HID device 0x%hx:0x%hx\n",
+ dbg_hid("Found dynamic quirk 0x%lx for HID device 0x%04x:0x%04x\n",
bl_entry->driver_data, bl_entry->vendor,
bl_entry->product);
@@ -1209,7 +1212,7 @@ static unsigned long hid_gets_squirk(const struct hid_device *hdev)
quirks |= bl_entry->driver_data;
if (quirks)
- dbg_hid("Found squirk 0x%lx for HID device 0x%hx:0x%hx\n",
+ dbg_hid("Found squirk 0x%lx for HID device 0x%04x:0x%04x\n",
quirks, hdev->vendor, hdev->product);
return quirks;
}
diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
index 8141cadfca0e..8dae0f9b819e 100644
--- a/drivers/hid/hid-steam.c
+++ b/drivers/hid/hid-steam.c
@@ -499,6 +499,7 @@ static void steam_battery_unregister(struct steam_device *steam)
static int steam_register(struct steam_device *steam)
{
int ret;
+ bool client_opened;
/*
* This function can be called several times in a row with the
@@ -511,9 +512,11 @@ static int steam_register(struct steam_device *steam)
* Unlikely, but getting the serial could fail, and it is not so
* important, so make up a serial number and go on.
*/
+ mutex_lock(&steam->mutex);
if (steam_get_serial(steam) < 0)
strlcpy(steam->serial_no, "XXXXXXXXXX",
sizeof(steam->serial_no));
+ mutex_unlock(&steam->mutex);
hid_info(steam->hdev, "Steam Controller '%s' connected",
steam->serial_no);
@@ -528,13 +531,15 @@ static int steam_register(struct steam_device *steam)
}
mutex_lock(&steam->mutex);
- if (!steam->client_opened) {
+ client_opened = steam->client_opened;
+ if (!client_opened)
steam_set_lizard_mode(steam, lizard_mode);
+ mutex_unlock(&steam->mutex);
+
+ if (!client_opened)
ret = steam_input_register(steam);
- } else {
+ else
ret = 0;
- }
- mutex_unlock(&steam->mutex);
return ret;
}
@@ -630,14 +635,21 @@ static void steam_client_ll_close(struct hid_device *hdev)
{
struct steam_device *steam = hdev->driver_data;
+ unsigned long flags;
+ bool connected;
+
+ spin_lock_irqsave(&steam->lock, flags);
+ connected = steam->connected;
+ spin_unlock_irqrestore(&steam->lock, flags);
+
mutex_lock(&steam->mutex);
steam->client_opened = false;
+ if (connected)
+ steam_set_lizard_mode(steam, lizard_mode);
mutex_unlock(&steam->mutex);
- if (steam->connected) {
- steam_set_lizard_mode(steam, lizard_mode);
+ if (connected)
steam_input_register(steam);
- }
}
static int steam_client_ll_raw_request(struct hid_device *hdev,
diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c
index 7710d9f957da..0187c9f8fc22 100644
--- a/drivers/hid/hid-uclogic-params.c
+++ b/drivers/hid/hid-uclogic-params.c
@@ -735,10 +735,6 @@ static int uclogic_params_huion_init(struct uclogic_params *params,
goto cleanup;
}
rc = usb_string(udev, 201, ver_ptr, ver_len);
- if (ver_ptr == NULL) {
- rc = -ENOMEM;
- goto cleanup;
- }
if (rc == -EPIPE) {
*ver_ptr = '\0';
} else if (rc < 0) {
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 90164fed08d3..4d1f24ee249c 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -184,6 +184,8 @@ static const struct i2c_hid_quirks {
I2C_HID_QUIRK_NO_RUNTIME_PM },
{ USB_VENDOR_ID_ELAN, HID_ANY_ID,
I2C_HID_QUIRK_BOGUS_IRQ },
+ { USB_VENDOR_ID_SYNAPTICS, I2C_DEVICE_ID_SYNAPTICS_7E7E,
+ I2C_HID_QUIRK_NO_RUNTIME_PM },
{ 0, 0 }
};
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 6f929bfa9fcd..d0f1dfe2bcbb 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1759,6 +1759,7 @@ config SENSORS_VT8231
config SENSORS_W83773G
tristate "Nuvoton W83773G"
depends on I2C
+ select REGMAP_I2C
help
If you say yes here you get support for the Nuvoton W83773G hardware
monitoring chip.
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index e4f9f7ce92fa..f9abeeeead9e 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -640,7 +640,7 @@ static const struct hwmon_channel_info ntc_chip = {
};
static const u32 ntc_temp_config[] = {
- HWMON_T_INPUT, HWMON_T_TYPE,
+ HWMON_T_INPUT | HWMON_T_TYPE,
0
};
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index b91a80abf724..4679acb4918e 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -890,6 +890,8 @@ static int occ_setup_sensor_attrs(struct occ *occ)
s++;
}
}
+
+ s = (sensors->power.num_sensors * 4) + 1;
} else {
for (i = 0; i < sensors->power.num_sensors; ++i) {
s = i + 1;
@@ -918,11 +920,11 @@ static int occ_setup_sensor_attrs(struct occ *occ)
show_power, NULL, 3, i);
attr++;
}
- }
- if (sensors->caps.num_sensors >= 1) {
s = sensors->power.num_sensors + 1;
+ }
+ if (sensors->caps.num_sensors >= 1) {
snprintf(attr->name, sizeof(attr->name), "power%d_label", s);
attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL,
0, 0);
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index f2c681971201..f8979abb9a19 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -131,6 +131,7 @@ config I2C_I801
Cannon Lake (PCH)
Cedar Fork (PCH)
Ice Lake (PCH)
+ Comet Lake (PCH)
This driver can also be built as a module. If so, the module
will be called i2c-i801.
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index c91e145ef5a5..679c6c41f64b 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -71,6 +71,7 @@
* Cannon Lake-LP (PCH) 0x9da3 32 hard yes yes yes
* Cedar Fork (PCH) 0x18df 32 hard yes yes yes
* Ice Lake-LP (PCH) 0x34a3 32 hard yes yes yes
+ * Comet Lake (PCH) 0x02a3 32 hard yes yes yes
*
* Features supported by this driver:
* Software PEC no
@@ -240,6 +241,7 @@
#define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS 0xa223
#define PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS 0xa2a3
#define PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS 0xa323
+#define PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS 0x02a3
struct i801_mux_config {
char *gpio_chip;
@@ -1038,6 +1040,7 @@ static const struct pci_device_id i801_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS) },
{ 0, }
};
@@ -1534,6 +1537,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS:
case PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS:
+ case PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS:
priv->features |= FEATURE_I2C_BLOCK_READ;
priv->features |= FEATURE_IRQ;
priv->features |= FEATURE_SMBUS_PEC;
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 42fed40198a0..c0c3043b5d61 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1169,11 +1169,13 @@ static int i2c_imx_probe(struct platform_device *pdev)
/* Init DMA config if supported */
ret = i2c_imx_dma_request(i2c_imx, phy_addr);
if (ret < 0)
- goto clk_notifier_unregister;
+ goto del_adapter;
dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n");
return 0; /* Return OK */
+del_adapter:
+ i2c_del_adapter(&i2c_imx->adapter);
clk_notifier_unregister:
clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb);
rpm_disable:
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 21cb088d6687..f7cdd2ab7f11 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3169,21 +3169,24 @@ static void amd_iommu_get_resv_regions(struct device *dev,
return;
list_for_each_entry(entry, &amd_iommu_unity_map, list) {
+ int type, prot = 0;
size_t length;
- int prot = 0;
if (devid < entry->devid_start || devid > entry->devid_end)
continue;
+ type = IOMMU_RESV_DIRECT;
length = entry->address_end - entry->address_start;
if (entry->prot & IOMMU_PROT_IR)
prot |= IOMMU_READ;
if (entry->prot & IOMMU_PROT_IW)
prot |= IOMMU_WRITE;
+ if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE)
+ /* Exclusion range */
+ type = IOMMU_RESV_RESERVED;
region = iommu_alloc_resv_region(entry->address_start,
- length, prot,
- IOMMU_RESV_DIRECT);
+ length, prot, type);
if (!region) {
dev_err(dev, "Out of memory allocating dm-regions\n");
return;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index f773792d77fd..1b1378619fc9 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2013,6 +2013,9 @@ static int __init init_unity_map_range(struct ivmd_header *m)
if (e == NULL)
return -ENOMEM;
+ if (m->flags & IVMD_FLAG_EXCL_RANGE)
+ init_exclusion_range(m);
+
switch (m->type) {
default:
kfree(e);
@@ -2059,9 +2062,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
while (p < end) {
m = (struct ivmd_header *)p;
- if (m->flags & IVMD_FLAG_EXCL_RANGE)
- init_exclusion_range(m);
- else if (m->flags & IVMD_FLAG_UNITY_MAP)
+ if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
init_unity_map_range(m);
p += m->length;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index eae0741f72dc..87965e4d9647 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -374,6 +374,8 @@
#define IOMMU_PROT_IR 0x01
#define IOMMU_PROT_IW 0x02
+#define IOMMU_UNITY_MAP_FLAG_EXCL_RANGE (1 << 2)
+
/* IOMMU capabilities */
#define IOMMU_CAP_IOTLB 24
#define IOMMU_CAP_NPCACHE 26
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index f101afc315ab..9a8a8870e267 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -160,6 +160,14 @@
#define ARM_V7S_TCR_PD1 BIT(5)
+#ifdef CONFIG_ZONE_DMA32
+#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32
+#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32
+#else
+#define ARM_V7S_TABLE_GFP_DMA GFP_DMA
+#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA
+#endif
+
typedef u32 arm_v7s_iopte;
static bool selftest_running;
@@ -197,13 +205,16 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
void *table = NULL;
if (lvl == 1)
- table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
+ table = (void *)__get_free_pages(
+ __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
else if (lvl == 2)
- table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA);
+ table = kmem_cache_zalloc(data->l2_tables, gfp);
phys = virt_to_phys(table);
- if (phys != (arm_v7s_iopte)phys)
+ if (phys != (arm_v7s_iopte)phys) {
/* Doesn't fit in PTE */
+ dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
goto out_free;
+ }
if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
@@ -733,7 +744,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
ARM_V7S_TABLE_SIZE(2),
ARM_V7S_TABLE_SIZE(2),
- SLAB_CACHE_DMA, NULL);
+ ARM_V7S_TABLE_SLAB_FLAGS, NULL);
if (!data->l2_tables)
goto out_free_data;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 33a982e33716..109de67d5d72 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1105,10 +1105,12 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type);
if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) {
- dev_warn(dev,
- "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
- iommu_def_domain_type);
dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA);
+ if (dom) {
+ dev_warn(dev,
+ "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
+ iommu_def_domain_type);
+ }
}
group->default_domain = dom;
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 4d85645c87f7..0928fd1f0e0c 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -4365,7 +4365,8 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev,
if (m->clock2)
test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip);
- if (ent->device == 0xB410) {
+ if (ent->vendor == PCI_VENDOR_ID_DIGIUM &&
+ ent->device == PCI_DEVICE_ID_DIGIUM_HFC4S) {
test_and_set_bit(HFC_CHIP_B410P, &hc->chip);
test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip);
test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c
index 7fea18b0c15d..7cb4d685a1f1 100644
--- a/drivers/leds/leds-pca9532.c
+++ b/drivers/leds/leds-pca9532.c
@@ -513,6 +513,7 @@ static int pca9532_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
int devid;
+ const struct of_device_id *of_id;
struct pca9532_data *data = i2c_get_clientdata(client);
struct pca9532_platform_data *pca9532_pdata =
dev_get_platdata(&client->dev);
@@ -528,8 +529,11 @@ static int pca9532_probe(struct i2c_client *client,
dev_err(&client->dev, "no platform data\n");
return -EINVAL;
}
- devid = (int)(uintptr_t)of_match_device(
- of_pca9532_leds_match, &client->dev)->data;
+ of_id = of_match_device(of_pca9532_leds_match,
+ &client->dev);
+ if (unlikely(!of_id))
+ return -EINVAL;
+ devid = (int)(uintptr_t) of_id->data;
} else {
devid = id->driver_data;
}
diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index 3dd3ed46d473..136f86a1627d 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -122,7 +122,8 @@ static ssize_t device_name_store(struct device *dev,
trigger_data->net_dev = NULL;
}
- strncpy(trigger_data->device_name, buf, size);
+ memcpy(trigger_data->device_name, buf, size);
+ trigger_data->device_name[size] = 0;
if (size > 0 && trigger_data->device_name[size - 1] == '\n')
trigger_data->device_name[size - 1] = 0;
@@ -301,11 +302,11 @@ static int netdev_trig_notify(struct notifier_block *nb,
container_of(nb, struct led_netdev_data, notifier);
if (evt != NETDEV_UP && evt != NETDEV_DOWN && evt != NETDEV_CHANGE
- && evt != NETDEV_REGISTER && evt != NETDEV_UNREGISTER
- && evt != NETDEV_CHANGENAME)
+ && evt != NETDEV_REGISTER && evt != NETDEV_UNREGISTER)
return NOTIFY_DONE;
- if (strcmp(dev->name, trigger_data->device_name))
+ if (!(dev == trigger_data->net_dev ||
+ (evt == NETDEV_REGISTER && !strcmp(dev->name, trigger_data->device_name))))
return NOTIFY_DONE;
cancel_delayed_work_sync(&trigger_data->work);
@@ -320,12 +321,9 @@ static int netdev_trig_notify(struct notifier_block *nb,
dev_hold(dev);
trigger_data->net_dev = dev;
break;
- case NETDEV_CHANGENAME:
case NETDEV_UNREGISTER:
- if (trigger_data->net_dev) {
- dev_put(trigger_data->net_dev);
- trigger_data->net_dev = NULL;
- }
+ dev_put(trigger_data->net_dev);
+ trigger_data->net_dev = NULL;
break;
case NETDEV_UP:
case NETDEV_CHANGE:
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 95c6d86ab5e8..c4ef1fceead6 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -115,6 +115,7 @@ struct mapped_device {
struct srcu_struct io_barrier;
};
+void disable_discard(struct mapped_device *md);
void disable_write_same(struct mapped_device *md);
void disable_write_zeroes(struct mapped_device *md);
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index b53f30f16b4d..4b76f84424c3 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -36,7 +36,7 @@ struct dm_device {
struct list_head list;
};
-const char *dm_allowed_targets[] __initconst = {
+const char * const dm_allowed_targets[] __initconst = {
"crypt",
"delay",
"linear",
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index d57d997a52c8..7c678f50aaa3 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -913,7 +913,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig
static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
{
return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
- range2->logical_sector + range2->n_sectors > range2->logical_sector;
+ range1->logical_sector + range1->n_sectors > range2->logical_sector;
}
static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
@@ -959,8 +959,6 @@ static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity
struct dm_integrity_range *last_range =
list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
struct task_struct *last_range_task;
- if (!ranges_overlap(range, last_range))
- break;
last_range_task = last_range->task;
list_del(&last_range->wait_entry);
if (!add_new_range(ic, last_range, false)) {
@@ -3185,7 +3183,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
journal_watermark = val;
else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
sync_msec = val;
- else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) {
+ else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) {
if (ic->meta_dev) {
dm_put_device(ti, ic->meta_dev);
ic->meta_dev = NULL;
@@ -3204,17 +3202,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
ic->sectors_per_block = val >> SECTOR_SHIFT;
- } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
+ } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
"Invalid internal_hash argument");
if (r)
goto bad;
- } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
+ } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
"Invalid journal_crypt argument");
if (r)
goto bad;
- } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
+ } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error,
"Invalid journal_mac argument");
if (r)
@@ -3616,7 +3614,7 @@ static struct target_type integrity_target = {
.io_hints = dm_integrity_io_hints,
};
-int __init dm_integrity_init(void)
+static int __init dm_integrity_init(void)
{
int r;
@@ -3635,7 +3633,7 @@ int __init dm_integrity_init(void)
return r;
}
-void dm_integrity_exit(void)
+static void __exit dm_integrity_exit(void)
{
dm_unregister_target(&integrity_target);
kmem_cache_destroy(journal_io_cache);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 09773636602d..b66745bd08bb 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -222,11 +222,14 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped)
}
if (unlikely(error == BLK_STS_TARGET)) {
- if (req_op(clone) == REQ_OP_WRITE_SAME &&
- !clone->q->limits.max_write_same_sectors)
+ if (req_op(clone) == REQ_OP_DISCARD &&
+ !clone->q->limits.max_discard_sectors)
+ disable_discard(tio->md);
+ else if (req_op(clone) == REQ_OP_WRITE_SAME &&
+ !clone->q->limits.max_write_same_sectors)
disable_write_same(tio->md);
- if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
- !clone->q->limits.max_write_zeroes_sectors)
+ else if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
+ !clone->q->limits.max_write_zeroes_sectors)
disable_write_zeroes(tio->md);
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ba9481f1bf3c..cde3b49b2a91 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1844,6 +1844,36 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
return true;
}
+static int device_requires_stable_pages(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && bdi_cap_stable_pages_required(q->backing_dev_info);
+}
+
+/*
+ * If any underlying device requires stable pages, a table must require
+ * them as well. Only targets that support iterate_devices are considered:
+ * don't want error, zero, etc to require stable pages.
+ */
+static bool dm_table_requires_stable_pages(struct dm_table *t)
+{
+ struct dm_target *ti;
+ unsigned i;
+
+ for (i = 0; i < dm_table_get_num_targets(t); i++) {
+ ti = dm_table_get_target(t, i);
+
+ if (ti->type->iterate_devices &&
+ ti->type->iterate_devices(ti, device_requires_stable_pages, NULL))
+ return true;
+ }
+
+ return false;
+}
+
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
@@ -1897,6 +1927,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
dm_table_verify_integrity(t);
/*
+ * Some devices don't use blk_integrity but still want stable pages
+ * because they do their own checksumming.
+ */
+ if (dm_table_requires_stable_pages(t))
+ q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
+ else
+ q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES;
+
+ /*
* Determine whether or not this queue's I/O timings contribute
* to the entropy pool, Only request-based targets use this.
* Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 68d24056d0b1..043f0761e4a0 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -945,6 +945,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
}
}
+void disable_discard(struct mapped_device *md)
+{
+ struct queue_limits *limits = dm_get_queue_limits(md);
+
+ /* device doesn't really support DISCARD, disable it */
+ limits->max_discard_sectors = 0;
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
+}
+
void disable_write_same(struct mapped_device *md)
{
struct queue_limits *limits = dm_get_queue_limits(md);
@@ -970,11 +979,14 @@ static void clone_endio(struct bio *bio)
dm_endio_fn endio = tio->ti->type->end_io;
if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
- if (bio_op(bio) == REQ_OP_WRITE_SAME &&
- !bio->bi_disk->queue->limits.max_write_same_sectors)
+ if (bio_op(bio) == REQ_OP_DISCARD &&
+ !bio->bi_disk->queue->limits.max_discard_sectors)
+ disable_discard(md);
+ else if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+ !bio->bi_disk->queue->limits.max_write_same_sectors)
disable_write_same(md);
- if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
- !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
+ else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+ !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
disable_write_zeroes(md);
}
@@ -1042,15 +1054,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
return -EINVAL;
}
- /*
- * BIO based queue uses its own splitting. When multipage bvecs
- * is switched on, size of the incoming bio may be too big to
- * be handled in some targets, such as crypt.
- *
- * When these targets are ready for the big bio, we can remove
- * the limit.
- */
- ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE);
+ ti->max_io_len = (uint32_t) len;
return 0;
}
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 0ce2d8dfc5f1..26ad6468d13a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1246,7 +1246,7 @@ config MFD_STA2X11
config MFD_SUN6I_PRCM
bool "Allwinner A31 PRCM controller"
- depends on ARCH_SUNXI
+ depends on ARCH_SUNXI || COMPILE_TEST
select MFD_CORE
help
Support for the PRCM (Power/Reset/Clock Management) unit available
diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c
index 69df27769c21..43ac71691fe4 100644
--- a/drivers/mfd/sprd-sc27xx-spi.c
+++ b/drivers/mfd/sprd-sc27xx-spi.c
@@ -53,67 +53,67 @@ static const struct sprd_pmic_data sc2731_data = {
static const struct mfd_cell sprd_pmic_devs[] = {
{
.name = "sc27xx-wdt",
- .of_compatible = "sprd,sc27xx-wdt",
+ .of_compatible = "sprd,sc2731-wdt",
}, {
.name = "sc27xx-rtc",
- .of_compatible = "sprd,sc27xx-rtc",
+ .of_compatible = "sprd,sc2731-rtc",
}, {
.name = "sc27xx-charger",
- .of_compatible = "sprd,sc27xx-charger",
+ .of_compatible = "sprd,sc2731-charger",
}, {
.name = "sc27xx-chg-timer",
- .of_compatible = "sprd,sc27xx-chg-timer",
+ .of_compatible = "sprd,sc2731-chg-timer",
}, {
.name = "sc27xx-fast-chg",
- .of_compatible = "sprd,sc27xx-fast-chg",
+ .of_compatible = "sprd,sc2731-fast-chg",
}, {
.name = "sc27xx-chg-wdt",
- .of_compatible = "sprd,sc27xx-chg-wdt",
+ .of_compatible = "sprd,sc2731-chg-wdt",
}, {
.name = "sc27xx-typec",
- .of_compatible = "sprd,sc27xx-typec",
+ .of_compatible = "sprd,sc2731-typec",
}, {
.name = "sc27xx-flash",
- .of_compatible = "sprd,sc27xx-flash",
+ .of_compatible = "sprd,sc2731-flash",
}, {
.name = "sc27xx-eic",
- .of_compatible = "sprd,sc27xx-eic",
+ .of_compatible = "sprd,sc2731-eic",
}, {
.name = "sc27xx-efuse",
- .of_compatible = "sprd,sc27xx-efuse",
+ .of_compatible = "sprd,sc2731-efuse",
}, {
.name = "sc27xx-thermal",
- .of_compatible = "sprd,sc27xx-thermal",
+ .of_compatible = "sprd,sc2731-thermal",
}, {
.name = "sc27xx-adc",
- .of_compatible = "sprd,sc27xx-adc",
+ .of_compatible = "sprd,sc2731-adc",
}, {
.name = "sc27xx-audio-codec",
- .of_compatible = "sprd,sc27xx-audio-codec",
+ .of_compatible = "sprd,sc2731-audio-codec",
}, {
.name = "sc27xx-regulator",
- .of_compatible = "sprd,sc27xx-regulator",
+ .of_compatible = "sprd,sc2731-regulator",
}, {
.name = "sc27xx-vibrator",
- .of_compatible = "sprd,sc27xx-vibrator",
+ .of_compatible = "sprd,sc2731-vibrator",
}, {
.name = "sc27xx-keypad-led",
- .of_compatible = "sprd,sc27xx-keypad-led",
+ .of_compatible = "sprd,sc2731-keypad-led",
}, {
.name = "sc27xx-bltc",
- .of_compatible = "sprd,sc27xx-bltc",
+ .of_compatible = "sprd,sc2731-bltc",
}, {
.name = "sc27xx-fgu",
- .of_compatible = "sprd,sc27xx-fgu",
+ .of_compatible = "sprd,sc2731-fgu",
}, {
.name = "sc27xx-7sreset",
- .of_compatible = "sprd,sc27xx-7sreset",
+ .of_compatible = "sprd,sc2731-7sreset",
}, {
.name = "sc27xx-poweroff",
- .of_compatible = "sprd,sc27xx-poweroff",
+ .of_compatible = "sprd,sc2731-poweroff",
}, {
.name = "sc27xx-syscon",
- .of_compatible = "sprd,sc27xx-syscon",
+ .of_compatible = "sprd,sc2731-syscon",
},
};
diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 299016bc46d9..104477b512a2 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -1245,6 +1245,28 @@ free:
return status;
}
+static int __maybe_unused twl_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ if (client->irq)
+ disable_irq(client->irq);
+
+ return 0;
+}
+
+static int __maybe_unused twl_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ if (client->irq)
+ enable_irq(client->irq);
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(twl_dev_pm_ops, twl_suspend, twl_resume);
+
static const struct i2c_device_id twl_ids[] = {
{ "twl4030", TWL4030_VAUX2 }, /* "Triton 2" */
{ "twl5030", 0 }, /* T2 updated */
@@ -1262,6 +1284,7 @@ static const struct i2c_device_id twl_ids[] = {
/* One Client Driver , 4 Clients */
static struct i2c_driver twl_driver = {
.driver.name = DRIVER_NAME,
+ .driver.pm = &twl_dev_pm_ops,
.id_table = twl_ids,
.probe = twl_probe,
.remove = twl_remove,
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 3525236ed8d9..19c84214a7ea 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -179,6 +179,12 @@ static void cs_do_release(struct kref *ref)
/* We also need to update CI for internal queues */
if (cs->submitted) {
+ int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
+
+ WARN_ONCE((cs_cnt < 0),
+ "hl%d: error in CS active cnt %d\n",
+ hdev->id, cs_cnt);
+
hl_int_hw_queue_update_ci(cs);
spin_lock(&hdev->hw_queues_mirror_lock);
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index a53c12aff6ad..974a87789bd8 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -232,6 +232,7 @@ static int vm_show(struct seq_file *s, void *data)
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
enum vm_type_t *vm_type;
bool once = true;
+ u64 j;
int i;
if (!dev_entry->hdev->mmu_enable)
@@ -260,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data)
} else {
phys_pg_pack = hnode->ptr;
seq_printf(s,
- " 0x%-14llx %-10u %-4u\n",
+ " 0x%-14llx %-10llu %-4u\n",
hnode->vaddr, phys_pg_pack->total_size,
phys_pg_pack->handle);
}
@@ -282,9 +283,9 @@ static int vm_show(struct seq_file *s, void *data)
phys_pg_pack->page_size);
seq_puts(s, " physical address\n");
seq_puts(s, "---------------------\n");
- for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+ for (j = 0 ; j < phys_pg_pack->npages ; j++) {
seq_printf(s, " 0x%-14llx\n",
- phys_pg_pack->pages[i]);
+ phys_pg_pack->pages[j]);
}
}
spin_unlock(&vm->idr_lock);
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index de46aa6ed154..77d51be66c7e 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -11,6 +11,8 @@
#include <linux/sched/signal.h>
#include <linux/hwmon.h>
+#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
+
bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
{
if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
@@ -216,6 +218,7 @@ static int device_early_init(struct hl_device *hdev)
spin_lock_init(&hdev->hw_queues_mirror_lock);
atomic_set(&hdev->in_reset, 0);
atomic_set(&hdev->fd_open_cnt, 0);
+ atomic_set(&hdev->cs_active_cnt, 0);
return 0;
@@ -413,6 +416,27 @@ int hl_device_suspend(struct hl_device *hdev)
pci_save_state(hdev->pdev);
+ /* Block future CS/VM/JOB completion operations */
+ rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ if (rc) {
+ dev_err(hdev->dev, "Can't suspend while in reset\n");
+ return -EIO;
+ }
+
+ /* This blocks all other stuff that is not blocked by in_reset */
+ hdev->disabled = true;
+
+ /*
+ * Flush anyone that is inside the critical section of enqueue
+ * jobs to the H/W
+ */
+ hdev->asic_funcs->hw_queues_lock(hdev);
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ /* Flush processes that are sending message to CPU */
+ mutex_lock(&hdev->send_cpu_message_lock);
+ mutex_unlock(&hdev->send_cpu_message_lock);
+
rc = hdev->asic_funcs->suspend(hdev);
if (rc)
dev_err(hdev->dev,
@@ -440,21 +464,38 @@ int hl_device_resume(struct hl_device *hdev)
pci_set_power_state(hdev->pdev, PCI_D0);
pci_restore_state(hdev->pdev);
- rc = pci_enable_device(hdev->pdev);
+ rc = pci_enable_device_mem(hdev->pdev);
if (rc) {
dev_err(hdev->dev,
"Failed to enable PCI device in resume\n");
return rc;
}
+ pci_set_master(hdev->pdev);
+
rc = hdev->asic_funcs->resume(hdev);
if (rc) {
- dev_err(hdev->dev,
- "Failed to enable PCI access from device CPU\n");
- return rc;
+ dev_err(hdev->dev, "Failed to resume device after suspend\n");
+ goto disable_device;
+ }
+
+
+ hdev->disabled = false;
+ atomic_set(&hdev->in_reset, 0);
+
+ rc = hl_device_reset(hdev, true, false);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to reset device during resume\n");
+ goto disable_device;
}
return 0;
+
+disable_device:
+ pci_clear_master(hdev->pdev);
+ pci_disable_device(hdev->pdev);
+
+ return rc;
}
static void hl_device_hard_reset_pending(struct work_struct *work)
@@ -462,9 +503,16 @@ static void hl_device_hard_reset_pending(struct work_struct *work)
struct hl_device_reset_work *device_reset_work =
container_of(work, struct hl_device_reset_work, reset_work);
struct hl_device *hdev = device_reset_work->hdev;
- u16 pending_cnt = HL_PENDING_RESET_PER_SEC;
+ u16 pending_total, pending_cnt;
struct task_struct *task = NULL;
+ if (hdev->pldm)
+ pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
+ else
+ pending_total = HL_PENDING_RESET_PER_SEC;
+
+ pending_cnt = pending_total;
+
/* Flush all processes that are inside hl_open */
mutex_lock(&hdev->fd_open_cnt_lock);
@@ -489,6 +537,19 @@ static void hl_device_hard_reset_pending(struct work_struct *work)
}
}
+ pending_cnt = pending_total;
+
+ while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
+
+ pending_cnt--;
+
+ ssleep(1);
+ }
+
+ if (atomic_read(&hdev->fd_open_cnt))
+ dev_crit(hdev->dev,
+ "Going to hard reset with open user contexts\n");
+
mutex_unlock(&hdev->fd_open_cnt_lock);
hl_device_reset(hdev, true, true);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 238dd57c541b..ea979ebd62fb 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1201,15 +1201,6 @@ static int goya_stop_external_queues(struct hl_device *hdev)
return retval;
}
-static void goya_resume_external_queues(struct hl_device *hdev)
-{
- WREG32(mmDMA_QM_0_GLBL_CFG1, 0);
- WREG32(mmDMA_QM_1_GLBL_CFG1, 0);
- WREG32(mmDMA_QM_2_GLBL_CFG1, 0);
- WREG32(mmDMA_QM_3_GLBL_CFG1, 0);
- WREG32(mmDMA_QM_4_GLBL_CFG1, 0);
-}
-
/*
* goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
*
@@ -2178,36 +2169,6 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
return retval;
}
-static void goya_resume_internal_queues(struct hl_device *hdev)
-{
- WREG32(mmMME_QM_GLBL_CFG1, 0);
- WREG32(mmMME_CMDQ_GLBL_CFG1, 0);
-
- WREG32(mmTPC0_QM_GLBL_CFG1, 0);
- WREG32(mmTPC0_CMDQ_GLBL_CFG1, 0);
-
- WREG32(mmTPC1_QM_GLBL_CFG1, 0);
- WREG32(mmTPC1_CMDQ_GLBL_CFG1, 0);
-
- WREG32(mmTPC2_QM_GLBL_CFG1, 0);
- WREG32(mmTPC2_CMDQ_GLBL_CFG1, 0);
-
- WREG32(mmTPC3_QM_GLBL_CFG1, 0);
- WREG32(mmTPC3_CMDQ_GLBL_CFG1, 0);
-
- WREG32(mmTPC4_QM_GLBL_CFG1, 0);
- WREG32(mmTPC4_CMDQ_GLBL_CFG1, 0);
-
- WREG32(mmTPC5_QM_GLBL_CFG1, 0);
- WREG32(mmTPC5_CMDQ_GLBL_CFG1, 0);
-
- WREG32(mmTPC6_QM_GLBL_CFG1, 0);
- WREG32(mmTPC6_CMDQ_GLBL_CFG1, 0);
-
- WREG32(mmTPC7_QM_GLBL_CFG1, 0);
- WREG32(mmTPC7_CMDQ_GLBL_CFG1, 0);
-}
-
static void goya_dma_stall(struct hl_device *hdev)
{
WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
@@ -2905,20 +2866,6 @@ int goya_suspend(struct hl_device *hdev)
{
int rc;
- rc = goya_stop_internal_queues(hdev);
-
- if (rc) {
- dev_err(hdev->dev, "failed to stop internal queues\n");
- return rc;
- }
-
- rc = goya_stop_external_queues(hdev);
-
- if (rc) {
- dev_err(hdev->dev, "failed to stop external queues\n");
- return rc;
- }
-
rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
if (rc)
dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
@@ -2928,15 +2875,7 @@ int goya_suspend(struct hl_device *hdev)
int goya_resume(struct hl_device *hdev)
{
- int rc;
-
- goya_resume_external_queues(hdev);
- goya_resume_internal_queues(hdev);
-
- rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
- if (rc)
- dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
- return rc;
+ return goya_init_iatu(hdev);
}
static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
@@ -3070,7 +3009,7 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
*dma_handle = hdev->asic_prop.sram_base_address;
- base = hdev->pcie_bar[SRAM_CFG_BAR_ID];
+ base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
switch (queue_id) {
case GOYA_QUEUE_ID_MME:
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index a7c95e9f9b9a..a8ee52c880cd 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -793,11 +793,11 @@ struct hl_vm_hash_node {
* struct hl_vm_phys_pg_pack - physical page pack.
* @vm_type: describes the type of the virtual area descriptor.
* @pages: the physical page array.
+ * @npages: num physical pages in the pack.
+ * @total_size: total size of all the pages in this list.
* @mapping_cnt: number of shared mappings.
* @asid: the context related to this list.
- * @npages: num physical pages in the pack.
* @page_size: size of each page in the pack.
- * @total_size: total size of all the pages in this list.
* @flags: HL_MEM_* flags related to this list.
* @handle: the provided handle related to this list.
* @offset: offset from the first page.
@@ -807,11 +807,11 @@ struct hl_vm_hash_node {
struct hl_vm_phys_pg_pack {
enum vm_type_t vm_type; /* must be first */
u64 *pages;
+ u64 npages;
+ u64 total_size;
atomic_t mapping_cnt;
u32 asid;
- u32 npages;
u32 page_size;
- u32 total_size;
u32 flags;
u32 handle;
u32 offset;
@@ -1056,13 +1056,15 @@ struct hl_device_reset_work {
* @cb_pool_lock: protects the CB pool.
* @user_ctx: current user context executing.
* @dram_used_mem: current DRAM memory consumption.
- * @in_reset: is device in reset flow.
- * @curr_pll_profile: current PLL profile.
- * @fd_open_cnt: number of open user processes.
* @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
* value is saved so in case of hard-reset, KMD will restore this
* value and update the F/W after the re-initialization
+ * @in_reset: is device in reset flow.
+ * @curr_pll_profile: current PLL profile.
+ * @fd_open_cnt: number of open user processes.
+ * @cs_active_cnt: number of active command submissions on this device (active
+ * means already in H/W queues)
* @major: habanalabs KMD major.
* @high_pll: high PLL profile frequency.
* @soft_reset_cnt: number of soft reset since KMD loading.
@@ -1128,11 +1130,12 @@ struct hl_device {
struct hl_ctx *user_ctx;
atomic64_t dram_used_mem;
+ u64 timeout_jiffies;
+ u64 max_power;
atomic_t in_reset;
atomic_t curr_pll_profile;
atomic_t fd_open_cnt;
- u64 timeout_jiffies;
- u64 max_power;
+ atomic_t cs_active_cnt;
u32 major;
u32 high_pll;
u32 soft_reset_cnt;
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 67bece26417c..ef3bb6951360 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -370,12 +370,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
spin_unlock(&hdev->hw_queues_mirror_lock);
}
- list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) {
+ atomic_inc(&hdev->cs_active_cnt);
+
+ list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
if (job->ext_queue)
ext_hw_queue_schedule_job(job);
else
int_hw_queue_schedule_job(job);
- }
cs->submitted = true;
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 3a12fd1a5274..ce1fda40a8b8 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -56,9 +56,9 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_device *hdev = ctx->hdev;
struct hl_vm *vm = &hdev->vm;
struct hl_vm_phys_pg_pack *phys_pg_pack;
- u64 paddr = 0;
- u32 total_size, num_pgs, num_curr_pgs, page_size, page_shift;
- int handle, rc, i;
+ u64 paddr = 0, total_size, num_pgs, i;
+ u32 num_curr_pgs, page_size, page_shift;
+ int handle, rc;
bool contiguous;
num_curr_pgs = 0;
@@ -73,7 +73,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
if (!paddr) {
dev_err(hdev->dev,
- "failed to allocate %u huge contiguous pages\n",
+ "failed to allocate %llu huge contiguous pages\n",
num_pgs);
return -ENOMEM;
}
@@ -93,7 +93,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
phys_pg_pack->flags = args->flags;
phys_pg_pack->contiguous = contiguous;
- phys_pg_pack->pages = kcalloc(num_pgs, sizeof(u64), GFP_KERNEL);
+ phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
if (!phys_pg_pack->pages) {
rc = -ENOMEM;
goto pages_arr_err;
@@ -148,7 +148,7 @@ page_err:
gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
page_size);
- kfree(phys_pg_pack->pages);
+ kvfree(phys_pg_pack->pages);
pages_arr_err:
kfree(phys_pg_pack);
pages_pack_err:
@@ -267,7 +267,7 @@ static void free_phys_pg_pack(struct hl_device *hdev,
struct hl_vm_phys_pg_pack *phys_pg_pack)
{
struct hl_vm *vm = &hdev->vm;
- int i;
+ u64 i;
if (!phys_pg_pack->created_from_userptr) {
if (phys_pg_pack->contiguous) {
@@ -288,7 +288,7 @@ static void free_phys_pg_pack(struct hl_device *hdev,
}
}
- kfree(phys_pg_pack->pages);
+ kvfree(phys_pg_pack->pages);
kfree(phys_pg_pack);
}
@@ -519,7 +519,7 @@ static inline int add_va_block(struct hl_device *hdev,
* - Return the start address of the virtual block
*/
static u64 get_va_block(struct hl_device *hdev,
- struct hl_va_range *va_range, u32 size, u64 hint_addr,
+ struct hl_va_range *va_range, u64 size, u64 hint_addr,
bool is_userptr)
{
struct hl_vm_va_block *va_block, *new_va_block = NULL;
@@ -577,7 +577,8 @@ static u64 get_va_block(struct hl_device *hdev,
}
if (!new_va_block) {
- dev_err(hdev->dev, "no available va block for size %u\n", size);
+ dev_err(hdev->dev, "no available va block for size %llu\n",
+ size);
goto out;
}
@@ -648,8 +649,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
struct hl_vm_phys_pg_pack *phys_pg_pack;
struct scatterlist *sg;
dma_addr_t dma_addr;
- u64 page_mask;
- u32 npages, total_npages, page_size = PAGE_SIZE;
+ u64 page_mask, total_npages;
+ u32 npages, page_size = PAGE_SIZE;
bool first = true, is_huge_page_opt = true;
int rc, i, j;
@@ -691,7 +692,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
page_mask = ~(((u64) page_size) - 1);
- phys_pg_pack->pages = kcalloc(total_npages, sizeof(u64), GFP_KERNEL);
+ phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
+ GFP_KERNEL);
if (!phys_pg_pack->pages) {
rc = -ENOMEM;
goto page_pack_arr_mem_err;
@@ -750,9 +752,9 @@ static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
struct hl_vm_phys_pg_pack *phys_pg_pack)
{
struct hl_device *hdev = ctx->hdev;
- u64 next_vaddr = vaddr, paddr;
+ u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
u32 page_size = phys_pg_pack->page_size;
- int i, rc = 0, mapped_pg_cnt = 0;
+ int rc = 0;
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i];
@@ -764,7 +766,7 @@ static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
if (rc) {
dev_err(hdev->dev,
- "map failed for handle %u, npages: %d, mapped: %d",
+ "map failed for handle %u, npages: %llu, mapped: %llu",
phys_pg_pack->handle, phys_pg_pack->npages,
mapped_pg_cnt);
goto err;
@@ -985,10 +987,10 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
struct hl_vm_hash_node *hnode = NULL;
struct hl_userptr *userptr = NULL;
enum vm_type_t *vm_type;
- u64 next_vaddr;
+ u64 next_vaddr, i;
u32 page_size;
bool is_userptr;
- int i, rc;
+ int rc;
/* protect from double entrance */
mutex_lock(&ctx->mem_hash_lock);
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 2f2e99cb2743..3a5a2cec8305 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -832,7 +832,7 @@ err:
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
{
struct hl_device *hdev = ctx->hdev;
- u64 real_virt_addr;
+ u64 real_virt_addr, real_phys_addr;
u32 real_page_size, npages;
int i, rc, mapped_cnt = 0;
@@ -857,14 +857,16 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
npages = page_size / real_page_size;
real_virt_addr = virt_addr;
+ real_phys_addr = phys_addr;
for (i = 0 ; i < npages ; i++) {
- rc = _hl_mmu_map(ctx, real_virt_addr, phys_addr,
+ rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
real_page_size);
if (rc)
goto err;
real_virt_addr += real_page_size;
+ real_phys_addr += real_page_size;
mapped_cnt++;
}
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 72428b6bfc47..7b7286b4d81e 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1876,7 +1876,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
continue;
}
- if (time_after(jiffies, timeo) && !chip_ready(map, adr))
+ /*
+ * We check "time_after" and "!chip_good" before checking "chip_good" to avoid
+ * the failure due to scheduling.
+ */
+ if (time_after(jiffies, timeo) && !chip_good(map, adr, datum))
break;
if (chip_good(map, adr, datum)) {
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 5e4ca082cfcd..7a96d168efc4 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -216,8 +216,8 @@ config GENEVE
config GTP
tristate "GPRS Tunneling Protocol datapath (GTP-U)"
- depends on INET && NET_UDP_TUNNEL
- select NET_IP_TUNNEL
+ depends on INET
+ select NET_UDP_TUNNEL
---help---
This allows one to create gtp virtual interfaces that provide
the GPRS Tunneling Protocol datapath (GTP-U). This tunneling protocol
diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index 2f120b2ffef0..4985268e2273 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -55,7 +55,9 @@ static SLAVE_ATTR_RO(link_failure_count);
static ssize_t perm_hwaddr_show(struct slave *slave, char *buf)
{
- return sprintf(buf, "%pM\n", slave->perm_hwaddr);
+ return sprintf(buf, "%*phC\n",
+ slave->dev->addr_len,
+ slave->perm_hwaddr);
}
static SLAVE_ATTR_RO(perm_hwaddr);
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index dce84a2a65c7..c44b2822e4dd 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -427,18 +427,22 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
return 0;
lane = mv88e6390x_serdes_get_lane(chip, port);
- if (lane < 0)
+ if (lane < 0 && lane != -ENODEV)
return lane;
- if (chip->ports[port].serdes_irq) {
- err = mv88e6390_serdes_irq_disable(chip, port, lane);
+ if (lane >= 0) {
+ if (chip->ports[port].serdes_irq) {
+ err = mv88e6390_serdes_irq_disable(chip, port, lane);
+ if (err)
+ return err;
+ }
+
+ err = mv88e6390x_serdes_power(chip, port, false);
if (err)
return err;
}
- err = mv88e6390x_serdes_power(chip, port, false);
- if (err)
- return err;
+ chip->ports[port].cmode = 0;
if (cmode) {
err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
@@ -452,6 +456,12 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
if (err)
return err;
+ chip->ports[port].cmode = cmode;
+
+ lane = mv88e6390x_serdes_get_lane(chip, port);
+ if (lane < 0)
+ return lane;
+
err = mv88e6390x_serdes_power(chip, port, true);
if (err)
return err;
@@ -463,8 +473,6 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
}
}
- chip->ports[port].cmode = cmode;
-
return 0;
}
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 576b37d12a63..c4fa400efdcc 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -481,6 +481,155 @@ qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable)
qca8k_reg_clear(priv, QCA8K_REG_PORT_STATUS(port), mask);
}
+static u32
+qca8k_port_to_phy(int port)
+{
+ /* From Andrew Lunn:
+ * Port 0 has no internal phy.
+ * Port 1 has an internal PHY at MDIO address 0.
+ * Port 2 has an internal PHY at MDIO address 1.
+ * ...
+ * Port 5 has an internal PHY at MDIO address 4.
+ * Port 6 has no internal PHY.
+ */
+
+ return port - 1;
+}
+
+static int
+qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
+{
+ u32 phy, val;
+
+ if (regnum >= QCA8K_MDIO_MASTER_MAX_REG)
+ return -EINVAL;
+
+ /* callee is responsible for not passing bad ports,
+ * but we still would like to make spills impossible.
+ */
+ phy = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
+ val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN |
+ QCA8K_MDIO_MASTER_WRITE | QCA8K_MDIO_MASTER_PHY_ADDR(phy) |
+ QCA8K_MDIO_MASTER_REG_ADDR(regnum) |
+ QCA8K_MDIO_MASTER_DATA(data);
+
+ qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val);
+
+ return qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
+ QCA8K_MDIO_MASTER_BUSY);
+}
+
+static int
+qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
+{
+ u32 phy, val;
+
+ if (regnum >= QCA8K_MDIO_MASTER_MAX_REG)
+ return -EINVAL;
+
+ /* callee is responsible for not passing bad ports,
+ * but we still would like to make spills impossible.
+ */
+ phy = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
+ val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN |
+ QCA8K_MDIO_MASTER_READ | QCA8K_MDIO_MASTER_PHY_ADDR(phy) |
+ QCA8K_MDIO_MASTER_REG_ADDR(regnum);
+
+ qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val);
+
+ if (qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
+ QCA8K_MDIO_MASTER_BUSY))
+ return -ETIMEDOUT;
+
+ val = (qca8k_read(priv, QCA8K_MDIO_MASTER_CTRL) &
+ QCA8K_MDIO_MASTER_DATA_MASK);
+
+ return val;
+}
+
+static int
+qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data)
+{
+ struct qca8k_priv *priv = ds->priv;
+
+ return qca8k_mdio_write(priv, port, regnum, data);
+}
+
+static int
+qca8k_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+ struct qca8k_priv *priv = ds->priv;
+ int ret;
+
+ ret = qca8k_mdio_read(priv, port, regnum);
+
+ if (ret < 0)
+ return 0xffff;
+
+ return ret;
+}
+
+static int
+qca8k_setup_mdio_bus(struct qca8k_priv *priv)
+{
+ u32 internal_mdio_mask = 0, external_mdio_mask = 0, reg;
+ struct device_node *ports, *port;
+ int err;
+
+ ports = of_get_child_by_name(priv->dev->of_node, "ports");
+ if (!ports)
+ return -EINVAL;
+
+ for_each_available_child_of_node(ports, port) {
+ err = of_property_read_u32(port, "reg", &reg);
+ if (err)
+ return err;
+
+ if (!dsa_is_user_port(priv->ds, reg))
+ continue;
+
+ if (of_property_read_bool(port, "phy-handle"))
+ external_mdio_mask |= BIT(reg);
+ else
+ internal_mdio_mask |= BIT(reg);
+ }
+
+ if (!external_mdio_mask && !internal_mdio_mask) {
+ dev_err(priv->dev, "no PHYs are defined.\n");
+ return -EINVAL;
+ }
+
+ /* The QCA8K_MDIO_MASTER_EN Bit, which grants access to PHYs through
+ * the MDIO_MASTER register also _disconnects_ the external MDC
+ * passthrough to the internal PHYs. It's not possible to use both
+ * configurations at the same time!
+ *
+ * Because this came up during the review process:
+ * If the external mdio-bus driver is capable magically disabling
+ * the QCA8K_MDIO_MASTER_EN and mutex/spin-locking out the qca8k's
+ * accessors for the time being, it would be possible to pull this
+ * off.
+ */
+ if (!!external_mdio_mask && !!internal_mdio_mask) {
+ dev_err(priv->dev, "either internal or external mdio bus configuration is supported.\n");
+ return -EINVAL;
+ }
+
+ if (external_mdio_mask) {
+ /* Make sure to disable the internal mdio bus in cases
+ * a dt-overlay and driver reload changed the configuration
+ */
+
+ qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
+ QCA8K_MDIO_MASTER_EN);
+ return 0;
+ }
+
+ priv->ops.phy_read = qca8k_phy_read;
+ priv->ops.phy_write = qca8k_phy_write;
+ return 0;
+}
+
static int
qca8k_setup(struct dsa_switch *ds)
{
@@ -502,6 +651,10 @@ qca8k_setup(struct dsa_switch *ds)
if (IS_ERR(priv->regmap))
pr_warn("regmap initialization failed");
+ ret = qca8k_setup_mdio_bus(priv);
+ if (ret)
+ return ret;
+
/* Initialize CPU port pad mode (xMII type, delays...) */
phy_mode = of_get_phy_mode(ds->ports[QCA8K_CPU_PORT].dn);
if (phy_mode < 0) {
@@ -624,22 +777,6 @@ qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy)
qca8k_port_set_status(priv, port, 1);
}
-static int
-qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum)
-{
- struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-
- return mdiobus_read(priv->bus, phy, regnum);
-}
-
-static int
-qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val)
-{
- struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-
- return mdiobus_write(priv->bus, phy, regnum, val);
-}
-
static void
qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data)
{
@@ -879,8 +1016,6 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
.setup = qca8k_setup,
.adjust_link = qca8k_adjust_link,
.get_strings = qca8k_get_strings,
- .phy_read = qca8k_phy_read,
- .phy_write = qca8k_phy_write,
.get_ethtool_stats = qca8k_get_ethtool_stats,
.get_sset_count = qca8k_get_sset_count,
.get_mac_eee = qca8k_get_mac_eee,
@@ -923,7 +1058,8 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
return -ENOMEM;
priv->ds->priv = priv;
- priv->ds->ops = &qca8k_switch_ops;
+ priv->ops = qca8k_switch_ops;
+ priv->ds->ops = &priv->ops;
mutex_init(&priv->reg_mutex);
dev_set_drvdata(&mdiodev->dev, priv);
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index d146e54c8a6c..249fd62268e5 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -49,6 +49,18 @@
#define QCA8K_MIB_FLUSH BIT(24)
#define QCA8K_MIB_CPU_KEEP BIT(20)
#define QCA8K_MIB_BUSY BIT(17)
+#define QCA8K_MDIO_MASTER_CTRL 0x3c
+#define QCA8K_MDIO_MASTER_BUSY BIT(31)
+#define QCA8K_MDIO_MASTER_EN BIT(30)
+#define QCA8K_MDIO_MASTER_READ BIT(27)
+#define QCA8K_MDIO_MASTER_WRITE 0
+#define QCA8K_MDIO_MASTER_SUP_PRE BIT(26)
+#define QCA8K_MDIO_MASTER_PHY_ADDR(x) ((x) << 21)
+#define QCA8K_MDIO_MASTER_REG_ADDR(x) ((x) << 16)
+#define QCA8K_MDIO_MASTER_DATA(x) (x)
+#define QCA8K_MDIO_MASTER_DATA_MASK GENMASK(15, 0)
+#define QCA8K_MDIO_MASTER_MAX_PORTS 5
+#define QCA8K_MDIO_MASTER_MAX_REG 32
#define QCA8K_GOL_MAC_ADDR0 0x60
#define QCA8K_GOL_MAC_ADDR1 0x64
#define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4)
@@ -169,6 +181,7 @@ struct qca8k_priv {
struct dsa_switch *ds;
struct mutex reg_mutex;
struct device *dev;
+ struct dsa_switch_ops ops;
};
struct qca8k_mib_desc {
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index 808abb6b3671..b15752267c8d 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -1521,7 +1521,7 @@ static void update_stats(int ioaddr, struct net_device *dev)
static void set_rx_mode(struct net_device *dev)
{
int ioaddr = dev->base_addr;
- short new_mode;
+ unsigned short new_mode;
if (dev->flags & IFF_PROMISC) {
if (corkscrew_debug > 3)
diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index 342ae08ec3c2..d60a86aa8aa8 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -153,8 +153,6 @@ static void dayna_block_input(struct net_device *dev, int count,
static void dayna_block_output(struct net_device *dev, int count,
const unsigned char *buf, int start_page);
-#define memcmp_withio(a, b, c) memcmp((a), (void *)(b), (c))
-
/* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */
static void slow_sane_get_8390_hdr(struct net_device *dev,
struct e8390_pkt_hdr *hdr, int ring_page);
@@ -233,19 +231,26 @@ static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres)
static enum mac8390_access mac8390_testio(unsigned long membase)
{
- unsigned long outdata = 0xA5A0B5B0;
- unsigned long indata = 0x00000000;
+ u32 outdata = 0xA5A0B5B0;
+ u32 indata = 0;
+
/* Try writing 32 bits */
- memcpy_toio((void __iomem *)membase, &outdata, 4);
- /* Now compare them */
- if (memcmp_withio(&outdata, membase, 4) == 0)
+ nubus_writel(outdata, membase);
+ /* Now read it back */
+ indata = nubus_readl(membase);
+ if (outdata == indata)
return ACCESS_32;
+
+ outdata = 0xC5C0D5D0;
+ indata = 0;
+
/* Write 16 bit output */
word_memcpy_tocard(membase, &outdata, 4);
/* Now read it back */
word_memcpy_fromcard(&indata, membase, 4);
if (outdata == indata)
return ACCESS_16;
+
return ACCESS_UNKNOWN;
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 74550ccc7a20..e2ffb159cbe2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -186,11 +186,12 @@ static void aq_rx_checksum(struct aq_ring_s *self,
}
if (buff->is_ip_cso) {
__skb_incr_checksum_unnecessary(skb);
- if (buff->is_udp_cso || buff->is_tcp_cso)
- __skb_incr_checksum_unnecessary(skb);
} else {
skb->ip_summed = CHECKSUM_NONE;
}
+
+ if (buff->is_udp_cso || buff->is_tcp_cso)
+ __skb_incr_checksum_unnecessary(skb);
}
#define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0bb9d7b3a2b6..4c586ba4364b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1133,6 +1133,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
tpa_info = &rxr->rx_tpa[agg_id];
if (unlikely(cons != rxr->rx_next_cons)) {
+ netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n",
+ cons, rxr->rx_next_cons);
bnxt_sched_reset(bp, rxr);
return;
}
@@ -1585,15 +1587,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
}
cons = rxcmp->rx_cmp_opaque;
- rx_buf = &rxr->rx_buf_ring[cons];
- data = rx_buf->data;
- data_ptr = rx_buf->data_ptr;
if (unlikely(cons != rxr->rx_next_cons)) {
int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
+ netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
+ cons, rxr->rx_next_cons);
bnxt_sched_reset(bp, rxr);
return rc1;
}
+ rx_buf = &rxr->rx_buf_ring[cons];
+ data = rx_buf->data;
+ data_ptr = rx_buf->data_ptr;
prefetch(data_ptr);
misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1);
@@ -1610,11 +1614,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
rx_buf->data = NULL;
if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) {
+ u32 rx_err = le32_to_cpu(rxcmp1->rx_cmp_cfa_code_errors_v2);
+
bnxt_reuse_rx_data(rxr, cons, data);
if (agg_bufs)
bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
rc = -EIO;
+ if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
+ netdev_warn(bp->dev, "RX buffer error %x\n", rx_err);
+ bnxt_sched_reset(bp, rxr);
+ }
goto next_rx;
}
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 328373e0578f..060a6f386104 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -4283,7 +4283,7 @@ static void tg3_power_down(struct tg3 *tp)
pci_set_power_state(tp->pdev, PCI_D3hot);
}
-static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex)
+static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u32 *speed, u8 *duplex)
{
switch (val & MII_TG3_AUX_STAT_SPDMASK) {
case MII_TG3_AUX_STAT_10HALF:
@@ -4787,7 +4787,7 @@ static int tg3_setup_copper_phy(struct tg3 *tp, bool force_reset)
bool current_link_up;
u32 bmsr, val;
u32 lcl_adv, rmt_adv;
- u16 current_speed;
+ u32 current_speed;
u8 current_duplex;
int i, err;
@@ -5719,7 +5719,7 @@ out:
static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset)
{
u32 orig_pause_cfg;
- u16 orig_active_speed;
+ u32 orig_active_speed;
u8 orig_active_duplex;
u32 mac_status;
bool current_link_up;
@@ -5823,7 +5823,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset)
{
int err = 0;
u32 bmsr, bmcr;
- u16 current_speed = SPEED_UNKNOWN;
+ u32 current_speed = SPEED_UNKNOWN;
u8 current_duplex = DUPLEX_UNKNOWN;
bool current_link_up = false;
u32 local_adv, remote_adv, sgsr;
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index a772a33b685c..6953d0546acb 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2873,7 +2873,7 @@ struct tg3_tx_ring_info {
struct tg3_link_config {
/* Describes what we're trying to get. */
u32 advertising;
- u16 speed;
+ u32 speed;
u8 duplex;
u8 autoneg;
u8 flowctrl;
@@ -2882,7 +2882,7 @@ struct tg3_link_config {
u8 active_flowctrl;
u8 active_duplex;
- u16 active_speed;
+ u32 active_speed;
u32 rmt_adv;
};
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ad099fd01b45..3da2795e2486 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -898,7 +898,9 @@ static void macb_tx_interrupt(struct macb_queue *queue)
/* First, update TX stats if needed */
if (skb) {
- if (gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+ if (unlikely(skb_shinfo(skb)->tx_flags &
+ SKBTX_HW_TSTAMP) &&
+ gem_ptp_do_txstamp(queue, skb, desc) == 0) {
/* skb now belongs to timestamp buffer
* and will be removed later
*/
@@ -3370,14 +3372,20 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
*hclk = devm_clk_get(&pdev->dev, "hclk");
}
- if (IS_ERR(*pclk)) {
+ if (IS_ERR_OR_NULL(*pclk)) {
err = PTR_ERR(*pclk);
+ if (!err)
+ err = -ENODEV;
+
dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err);
return err;
}
- if (IS_ERR(*hclk)) {
+ if (IS_ERR_OR_NULL(*hclk)) {
err = PTR_ERR(*hclk);
+ if (!err)
+ err = -ENODEV;
+
dev_err(&pdev->dev, "failed to get hclk (%u)\n", err);
return err;
}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index aa2be4807191..28eac9056211 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1328,10 +1328,11 @@ int nicvf_stop(struct net_device *netdev)
struct nicvf_cq_poll *cq_poll = NULL;
union nic_mbx mbx = {};
- cancel_delayed_work_sync(&nic->link_change_work);
-
/* wait till all queued set_rx_mode tasks completes */
- drain_workqueue(nic->nicvf_rx_mode_wq);
+ if (nic->nicvf_rx_mode_wq) {
+ cancel_delayed_work_sync(&nic->link_change_work);
+ drain_workqueue(nic->nicvf_rx_mode_wq);
+ }
mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
nicvf_send_msg_to_pf(nic, &mbx);
@@ -1452,7 +1453,8 @@ int nicvf_open(struct net_device *netdev)
struct nicvf_cq_poll *cq_poll = NULL;
/* wait till all queued set_rx_mode tasks completes if any */
- drain_workqueue(nic->nicvf_rx_mode_wq);
+ if (nic->nicvf_rx_mode_wq)
+ drain_workqueue(nic->nicvf_rx_mode_wq);
netif_carrier_off(netdev);
@@ -1550,10 +1552,12 @@ int nicvf_open(struct net_device *netdev)
/* Send VF config done msg to PF */
nicvf_send_cfg_done(nic);
- INIT_DELAYED_WORK(&nic->link_change_work,
- nicvf_link_status_check_task);
- queue_delayed_work(nic->nicvf_rx_mode_wq,
- &nic->link_change_work, 0);
+ if (nic->nicvf_rx_mode_wq) {
+ INIT_DELAYED_WORK(&nic->link_change_work,
+ nicvf_link_status_check_task);
+ queue_delayed_work(nic->nicvf_rx_mode_wq,
+ &nic->link_change_work, 0);
+ }
return 0;
cleanup:
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 5b4d3badcb73..e246f9733bb8 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -105,20 +105,19 @@ static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic,
/* Check if page can be recycled */
if (page) {
ref_count = page_ref_count(page);
- /* Check if this page has been used once i.e 'put_page'
- * called after packet transmission i.e internal ref_count
- * and page's ref_count are equal i.e page can be recycled.
+ /* This page can be recycled if internal ref_count and page's
+ * ref_count are equal, indicating that the page has been used
+ * once for packet transmission. For non-XDP mode, internal
+ * ref_count is always '1'.
*/
- if (rbdr->is_xdp && (ref_count == pgcache->ref_count))
- pgcache->ref_count--;
- else
- page = NULL;
-
- /* In non-XDP mode, page's ref_count needs to be '1' for it
- * to be recycled.
- */
- if (!rbdr->is_xdp && (ref_count != 1))
+ if (rbdr->is_xdp) {
+ if (ref_count == pgcache->ref_count)
+ pgcache->ref_count--;
+ else
+ page = NULL;
+ } else if (ref_count != 1) {
page = NULL;
+ }
}
if (!page) {
@@ -365,11 +364,10 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
while (head < rbdr->pgcnt) {
pgcache = &rbdr->pgcache[head];
if (pgcache->page && page_ref_count(pgcache->page) != 0) {
- if (!rbdr->is_xdp) {
- put_page(pgcache->page);
- continue;
+ if (rbdr->is_xdp) {
+ page_ref_sub(pgcache->page,
+ pgcache->ref_count - 1);
}
- page_ref_sub(pgcache->page, pgcache->ref_count - 1);
put_page(pgcache->page);
}
head++;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 3130b43bba52..02959035ed3f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2620,7 +2620,7 @@ static inline struct port_info *ethqset2pinfo(struct adapter *adap, int qset)
}
/* should never happen! */
- BUG_ON(1);
+ BUG();
return NULL;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 88773ca58e6b..b3da81e90132 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -476,7 +476,7 @@ static inline int get_buf_size(struct adapter *adapter,
break;
default:
- BUG_ON(1);
+ BUG();
}
return buf_size;
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
index 74849be5f004..e2919005ead3 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
@@ -354,7 +354,10 @@ static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
ppmax = max;
/* pool size must be multiple of unsigned long */
- bmap = BITS_TO_LONGS(ppmax);
+ bmap = ppmax / BITS_PER_TYPE(unsigned long);
+ if (!bmap)
+ return NULL;
+
ppmax = (bmap * sizeof(unsigned long)) << 3;
alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
@@ -402,6 +405,10 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
if (reserve_factor) {
ppmax_pool = ppmax / reserve_factor;
pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
+ if (!pool) {
+ ppmax_pool = 0;
+ reserve_factor = 0;
+ }
pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
ndev->name, ppmax, ppmax_pool, pool_index_max);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 2ba49e959c3f..dc339dc1adb2 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -815,6 +815,14 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
*/
queue_mapping = skb_get_queue_mapping(skb);
fq = &priv->fq[queue_mapping];
+
+ fd_len = dpaa2_fd_get_len(&fd);
+ nq = netdev_get_tx_queue(net_dev, queue_mapping);
+ netdev_tx_sent_queue(nq, fd_len);
+
+ /* Everything that happens after this enqueues might race with
+ * the Tx confirmation callback for this frame
+ */
for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
err = priv->enqueue(priv, fq, &fd, 0);
if (err != -EBUSY)
@@ -825,13 +833,10 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
percpu_stats->tx_errors++;
/* Clean up everything, including freeing the skb */
free_tx_fd(priv, fq, &fd, false);
+ netdev_tx_completed_queue(nq, 1, fd_len);
} else {
- fd_len = dpaa2_fd_get_len(&fd);
percpu_stats->tx_packets++;
percpu_stats->tx_bytes += fd_len;
-
- nq = netdev_get_tx_queue(net_dev, queue_mapping);
- netdev_tx_sent_queue(nq, fd_len);
}
return NETDEV_TX_OK;
@@ -1817,7 +1822,7 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
dpaa2_fd_set_format(&fd, dpaa2_fd_single);
dpaa2_fd_set_ctrl(&fd, FD_CTRL_PTA);
- fq = &priv->fq[smp_processor_id()];
+ fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
err = priv->enqueue(priv, fq, &fd, 0);
if (err != -EBUSY)
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 79d03f8ee7b1..c7fa97a7e1f4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -150,7 +150,6 @@ out_buffer_fail:
/* free desc along with its attached buffer */
static void hnae_free_desc(struct hnae_ring *ring)
{
- hnae_free_buffers(ring);
dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr,
ring->desc_num * sizeof(ring->desc[0]),
ring_to_dma_dir(ring));
@@ -183,6 +182,9 @@ static int hnae_alloc_desc(struct hnae_ring *ring)
/* fini ring, also free the buffer for the ring */
static void hnae_fini_ring(struct hnae_ring *ring)
{
+ if (is_rx_ring(ring))
+ hnae_free_buffers(ring);
+
hnae_free_desc(ring);
kfree(ring->desc_cb);
ring->desc_cb = NULL;
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 08a750fb60c4..d6fb83437230 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -357,7 +357,7 @@ struct hnae_buf_ops {
};
struct hnae_queue {
- void __iomem *io_base;
+ u8 __iomem *io_base;
phys_addr_t phy_base;
struct hnae_ae_dev *dev; /* the device who use this queue */
struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index a97228c93831..6c0507921623 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -370,7 +370,7 @@ int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn)
static void hns_mac_param_get(struct mac_params *param,
struct hns_mac_cb *mac_cb)
{
- param->vaddr = (void *)mac_cb->vaddr;
+ param->vaddr = mac_cb->vaddr;
param->mac_mode = hns_get_enet_interface(mac_cb);
ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr);
param->mac_id = mac_cb->mac_id;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index fbc75341bef7..22589799f1a5 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -187,7 +187,7 @@ struct mac_statistics {
/*mac para struct ,mac get param from nic or dsaf when initialize*/
struct mac_params {
char addr[ETH_ALEN];
- void *vaddr; /*virtual address*/
+ u8 __iomem *vaddr; /*virtual address*/
struct device *dev;
u8 mac_id;
/**< Ethernet operation mode (MAC-PHY interface and speed) */
@@ -402,7 +402,7 @@ struct mac_driver {
enum mac_mode mac_mode;
u8 mac_id;
struct hns_mac_cb *mac_cb;
- void __iomem *io_base;
+ u8 __iomem *io_base;
unsigned int mac_en_flg;/*you'd better don't enable mac twice*/
unsigned int virt_dev_num;
struct device *dev;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index ac55db065f16..61eea6ac846f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -1602,8 +1602,6 @@ static void hns_dsaf_set_mac_key(
DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,
DSAF_TBL_TCAM_KEY_PORT_S, port);
-
- mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan);
}
/**
@@ -1663,8 +1661,8 @@ int hns_dsaf_set_mac_uc_entry(
/* default config dvc to 0 */
mac_data.tbl_ucast_dvc = 0;
mac_data.tbl_ucast_out_port = mac_entry->port_num;
- tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
- tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+ tcam_data.tbl_tcam_data_high = mac_key.high.val;
+ tcam_data.tbl_tcam_data_low = mac_key.low.val;
hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data);
@@ -1786,9 +1784,6 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
0xff,
mc_mask);
- mask_key.high.val = le32_to_cpu(mask_key.high.val);
- mask_key.low.val = le32_to_cpu(mask_key.low.val);
-
pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
}
@@ -1840,8 +1835,8 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
dsaf_dev->ae_dev.name, mac_key.high.val,
mac_key.low.val, entry_index);
- tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
- tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+ tcam_data.tbl_tcam_data_high = mac_key.high.val;
+ tcam_data.tbl_tcam_data_low = mac_key.low.val;
/* config mc entry with mask */
hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data,
@@ -1956,9 +1951,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
/* config key mask */
hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_mask);
- mask_key.high.val = le32_to_cpu(mask_key.high.val);
- mask_key.low.val = le32_to_cpu(mask_key.low.val);
-
pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
}
@@ -2012,8 +2004,8 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
soft_mac_entry += entry_index;
soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX;
} else { /* not zero, just del port, update */
- tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
- tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+ tcam_data.tbl_tcam_data_high = mac_key.high.val;
+ tcam_data.tbl_tcam_data_low = mac_key.low.val;
hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index,
&tcam_data,
@@ -2750,6 +2742,17 @@ int hns_dsaf_get_regs_count(void)
return DSAF_DUMP_REGS_NUM;
}
+static int hns_dsaf_get_port_id(u8 port)
+{
+ if (port < DSAF_SERVICE_NW_NUM)
+ return port;
+
+ if (port >= DSAF_BASE_INNER_PORT_NUM)
+ return port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM;
+
+ return -EINVAL;
+}
+
static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port)
{
struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80};
@@ -2815,23 +2818,33 @@ static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port)
memset(&temp_key, 0x0, sizeof(temp_key));
mask_entry.addr[0] = 0x01;
hns_dsaf_set_mac_key(dsaf_dev, &mask_key, mask_entry.in_vlan_id,
- port, mask_entry.addr);
+ 0xf, mask_entry.addr);
tbl_tcam_mcast.tbl_mcast_item_vld = 1;
tbl_tcam_mcast.tbl_mcast_old_en = 0;
- if (port < DSAF_SERVICE_NW_NUM) {
- mskid = port;
- } else if (port >= DSAF_BASE_INNER_PORT_NUM) {
- mskid = port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM;
- } else {
+ /* set MAC port to handle multicast */
+ mskid = hns_dsaf_get_port_id(port);
+ if (mskid == -EINVAL) {
dev_err(dsaf_dev->dev, "%s,pnum(%d)error,key(%#x:%#x)\n",
dsaf_dev->ae_dev.name, port,
mask_key.high.val, mask_key.low.val);
return;
}
+ dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32],
+ mskid % 32, 1);
+ /* set pool bit map to handle multicast */
+ mskid = hns_dsaf_get_port_id(port_num);
+ if (mskid == -EINVAL) {
+ dev_err(dsaf_dev->dev,
+ "%s, pool bit map pnum(%d)error,key(%#x:%#x)\n",
+ dsaf_dev->ae_dev.name, port_num,
+ mask_key.high.val, mask_key.low.val);
+ return;
+ }
dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32],
mskid % 32, 1);
+
memcpy(&temp_key, &mask_key, sizeof(mask_key));
hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc,
(struct dsaf_tbl_tcam_data *)(&mask_key),
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 0e1cd99831a6..76cc8887e1a8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -467,4 +467,6 @@ int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev,
u8 mac_id, u8 port_num);
int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port);
+int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
+
#endif /* __HNS_DSAF_MAIN_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 16294cd3c954..19b94879691f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -670,7 +670,7 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en)
dsaf_set_field(origin, 1ull << 10, 10, en);
dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin);
} else {
- u8 *base_addr = (u8 *)mac_cb->serdes_vaddr +
+ u8 __iomem *base_addr = mac_cb->serdes_vaddr +
(mac_cb->mac_id <= 3 ? 0x00280000 : 0x00200000);
dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, en);
}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index 3d07c8a7639d..17c019106e6e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -61,7 +61,7 @@ void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb,
}
}
-static void __iomem *
+static u8 __iomem *
hns_ppe_common_get_ioaddr(struct ppe_common_cb *ppe_common)
{
return ppe_common->dsaf_dev->ppe_base + PPE_COMMON_REG_OFFSET;
@@ -111,8 +111,8 @@ hns_ppe_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index)
dsaf_dev->ppe_common[comm_index] = NULL;
}
-static void __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common,
- int ppe_idx)
+static u8 __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common,
+ int ppe_idx)
{
return ppe_common->dsaf_dev->ppe_base + ppe_idx * PPE_REG_OFFSET;
}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
index f670e63a5a01..110c6e8222c7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
@@ -80,7 +80,7 @@ struct hns_ppe_cb {
struct hns_ppe_hw_stats hw_stats;
u8 index; /* index in a ppe common device */
- void __iomem *io_base;
+ u8 __iomem *io_base;
int virq;
u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */
u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]; /* rss hash key */
@@ -89,7 +89,7 @@ struct hns_ppe_cb {
struct ppe_common_cb {
struct device *dev;
struct dsaf_device *dsaf_dev;
- void __iomem *io_base;
+ u8 __iomem *io_base;
enum ppe_common_mode ppe_mode;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 6bf346c11b25..ac3518ca4d7b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -458,7 +458,7 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type)
mdnum_ppkt = HNS_RCB_RING_MAX_BD_PER_PKT;
} else {
ring = &q->tx_ring;
- ring->io_base = (u8 __iomem *)ring_pair_cb->q.io_base +
+ ring->io_base = ring_pair_cb->q.io_base +
HNS_RCB_TX_REG_OFFSET;
irq_idx = HNS_RCB_IRQ_IDX_TX;
mdnum_ppkt = is_ver1 ? HNS_RCB_RING_MAX_TXBD_PER_PKT :
@@ -764,7 +764,7 @@ static int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev)
}
}
-static void __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common)
+static u8 __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common)
{
struct dsaf_device *dsaf_dev = rcb_common->dsaf_dev;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index b9733b0b8482..b9e7f11f0896 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -1018,7 +1018,7 @@
#define XGMAC_PAUSE_CTL_RSP_MODE_B 2
#define XGMAC_PAUSE_CTL_TX_XOFF_B 3
-static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value)
+static inline void dsaf_write_reg(u8 __iomem *base, u32 reg, u32 value)
{
writel(value, base + reg);
}
@@ -1053,7 +1053,7 @@ static inline int dsaf_read_syscon(struct regmap *base, u32 reg, u32 *val)
#define dsaf_set_bit(origin, shift, val) \
dsaf_set_field((origin), (1ull << (shift)), (shift), (val))
-static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask,
+static inline void dsaf_set_reg_field(u8 __iomem *base, u32 reg, u32 mask,
u32 shift, u32 val)
{
u32 origin = dsaf_read_reg(base, reg);
@@ -1073,7 +1073,7 @@ static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask,
#define dsaf_get_bit(origin, shift) \
dsaf_get_field((origin), (1ull << (shift)), (shift))
-static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask,
+static inline u32 dsaf_get_reg_field(u8 __iomem *base, u32 reg, u32 mask,
u32 shift)
{
u32 origin;
@@ -1089,11 +1089,11 @@ static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask,
dsaf_get_reg_field((dev)->io_base, (reg), (1ull << (bit)), (bit))
#define dsaf_write_b(addr, data)\
- writeb((data), (__iomem unsigned char *)(addr))
+ writeb((data), (__iomem u8 *)(addr))
#define dsaf_read_b(addr)\
- readb((__iomem unsigned char *)(addr))
+ readb((__iomem u8 *)(addr))
#define hns_mac_reg_read64(drv, offset) \
- readq((__iomem void *)(((u8 *)(drv)->io_base + 0xc00 + (offset))))
+ readq((__iomem void *)(((drv)->io_base + 0xc00 + (offset))))
#endif /* _DSAF_REG_H */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index ba4316910dea..a60f207768fc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -129,7 +129,7 @@ static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv)
dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0);
dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1);
dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0);
- dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val);
+ dsaf_write_dev(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val);
}
/**
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 60e7d7ae3787..4cd86ba1f050 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -29,9 +29,6 @@
#define SERVICE_TIMER_HZ (1 * HZ)
-#define NIC_TX_CLEAN_MAX_NUM 256
-#define NIC_RX_CLEAN_MAX_NUM 64
-
#define RCB_IRQ_NOT_INITED 0
#define RCB_IRQ_INITED 1
#define HNS_BUFFER_SIZE_2048 2048
@@ -376,8 +373,6 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,
wmb(); /* commit all data before submit */
assert(skb->queue_mapping < priv->ae_handle->q_num);
hnae_queue_xmit(priv->ae_handle->qs[skb->queue_mapping], buf_num);
- ring->stats.tx_pkts++;
- ring->stats.tx_bytes += skb->len;
return NETDEV_TX_OK;
@@ -999,6 +994,9 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
/* issue prefetch for next Tx descriptor */
prefetch(&ring->desc_cb[ring->next_to_clean]);
}
+ /* update tx ring statistics. */
+ ring->stats.tx_pkts += pkts;
+ ring->stats.tx_bytes += bytes;
NETIF_TX_UNLOCK(ring);
@@ -2152,7 +2150,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv)
hns_nic_tx_fini_pro_v2;
netif_napi_add(priv->netdev, &rd->napi,
- hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM);
+ hns_nic_common_poll, NAPI_POLL_WEIGHT);
rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED;
}
for (i = h->q_num; i < h->q_num * 2; i++) {
@@ -2165,7 +2163,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv)
hns_nic_rx_fini_pro_v2;
netif_napi_add(priv->netdev, &rd->napi,
- hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM);
+ hns_nic_common_poll, NAPI_POLL_WEIGHT);
rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 1c1f17ec6be2..162cb9afa0e7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -22,6 +22,7 @@
#include "hns3_enet.h"
#define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift)))
+#define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE)
static void hns3_clear_all_ring(struct hnae3_handle *h);
static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h);
@@ -1079,7 +1080,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
desc_cb->length = size;
- frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) >> HNS3_MAX_BD_SIZE_OFFSET;
+ frag_buf_num = hns3_tx_bd_count(size);
sizeoflast = size & HNS3_TX_LAST_SIZE_M;
sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE;
@@ -1124,14 +1125,13 @@ static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum,
int i;
size = skb_headlen(skb);
- buf_num = (size + HNS3_MAX_BD_SIZE - 1) >> HNS3_MAX_BD_SIZE_OFFSET;
+ buf_num = hns3_tx_bd_count(size);
frag_num = skb_shinfo(skb)->nr_frags;
for (i = 0; i < frag_num; i++) {
frag = &skb_shinfo(skb)->frags[i];
size = skb_frag_size(frag);
- bdnum_for_frag = (size + HNS3_MAX_BD_SIZE - 1) >>
- HNS3_MAX_BD_SIZE_OFFSET;
+ bdnum_for_frag = hns3_tx_bd_count(size);
if (unlikely(bdnum_for_frag > HNS3_MAX_BD_PER_FRAG))
return -ENOMEM;
@@ -1139,8 +1139,7 @@ static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum,
}
if (unlikely(buf_num > HNS3_MAX_BD_PER_FRAG)) {
- buf_num = (skb->len + HNS3_MAX_BD_SIZE - 1) >>
- HNS3_MAX_BD_SIZE_OFFSET;
+ buf_num = hns3_tx_bd_count(skb->len);
if (ring_space(ring) < buf_num)
return -EBUSY;
/* manual split the send packet */
@@ -1169,7 +1168,7 @@ static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum,
buf_num = skb_shinfo(skb)->nr_frags + 1;
if (unlikely(buf_num > HNS3_MAX_BD_PER_FRAG)) {
- buf_num = (skb->len + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
+ buf_num = hns3_tx_bd_count(skb->len);
if (ring_space(ring) < buf_num)
return -EBUSY;
/* manual split the send packet */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 1db0bd41d209..75669cd0c311 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -193,7 +193,6 @@ enum hns3_nic_state {
#define HNS3_VECTOR_INITED 1
#define HNS3_MAX_BD_SIZE 65535
-#define HNS3_MAX_BD_SIZE_OFFSET 16
#define HNS3_MAX_BD_PER_FRAG 8
#define HNS3_MAX_BD_PER_PKT MAX_SKB_FRAGS
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index fffe8c1c45d3..0fb61d440d3b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -3,7 +3,7 @@
# Makefile for the HISILICON network device drivers.
#
-ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_HNS3_HCLGE) += hclge.o
hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge_debugfs.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
index fb93bbd35845..6193f8fa7cf3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
@@ -3,7 +3,7 @@
# Makefile for the HISILICON network device drivers.
#
-ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o
hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o \ No newline at end of file
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index baf5cc251f32..8b8a7d00e8e0 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -39,7 +39,7 @@ struct hns_mdio_sc_reg {
};
struct hns_mdio_device {
- void *vbase; /* mdio reg base address */
+ u8 __iomem *vbase; /* mdio reg base address */
struct regmap *subctrl_vbase;
struct hns_mdio_sc_reg sc_reg;
};
@@ -96,21 +96,17 @@ enum mdio_c45_op_seq {
#define MDIO_SC_CLK_ST 0x531C
#define MDIO_SC_RESET_ST 0x5A1C
-static void mdio_write_reg(void *base, u32 reg, u32 value)
+static void mdio_write_reg(u8 __iomem *base, u32 reg, u32 value)
{
- u8 __iomem *reg_addr = (u8 __iomem *)base;
-
- writel_relaxed(value, reg_addr + reg);
+ writel_relaxed(value, base + reg);
}
#define MDIO_WRITE_REG(a, reg, value) \
mdio_write_reg((a)->vbase, (reg), (value))
-static u32 mdio_read_reg(void *base, u32 reg)
+static u32 mdio_read_reg(u8 __iomem *base, u32 reg)
{
- u8 __iomem *reg_addr = (u8 __iomem *)base;
-
- return readl_relaxed(reg_addr + reg);
+ return readl_relaxed(base + reg);
}
#define mdio_set_field(origin, mask, shift, val) \
@@ -121,7 +117,7 @@ static u32 mdio_read_reg(void *base, u32 reg)
#define mdio_get_field(origin, mask, shift) (((origin) >> (shift)) & (mask))
-static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift,
+static void mdio_set_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift,
u32 val)
{
u32 origin = mdio_read_reg(base, reg);
@@ -133,7 +129,7 @@ static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift,
#define MDIO_SET_REG_FIELD(dev, reg, mask, shift, val) \
mdio_set_reg_field((dev)->vbase, (reg), (mask), (shift), (val))
-static u32 mdio_get_reg_field(void *base, u32 reg, u32 mask, u32 shift)
+static u32 mdio_get_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift)
{
u32 origin;
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 3baabdc89726..90b62c1412c8 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -3160,6 +3160,7 @@ static ssize_t ehea_probe_port(struct device *dev,
if (ehea_add_adapter_mr(adapter)) {
pr_err("creating MR failed\n");
+ of_node_put(eth_dn);
return -EIO;
}
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5ecbb1adcf3b..51cfe95f3e24 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1885,6 +1885,7 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
*/
adapter->state = VNIC_PROBED;
+ reinit_completion(&adapter->init_done);
rc = init_crq_queue(adapter);
if (rc) {
netdev_err(adapter->netdev,
@@ -4625,7 +4626,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
old_num_rx_queues = adapter->req_rx_queues;
old_num_tx_queues = adapter->req_tx_queues;
- init_completion(&adapter->init_done);
+ reinit_completion(&adapter->init_done);
adapter->init_done_rc = 0;
ibmvnic_send_crq_init(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
@@ -4680,7 +4681,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
adapter->from_passive_init = false;
- init_completion(&adapter->init_done);
adapter->init_done_rc = 0;
ibmvnic_send_crq_init(adapter);
if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
@@ -4759,6 +4759,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
INIT_LIST_HEAD(&adapter->rwi_list);
spin_lock_init(&adapter->rwi_lock);
+ init_completion(&adapter->init_done);
adapter->resetting = false;
adapter->mac_change_pending = false;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 5a0419421511..ecef949f3baa 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -41,6 +41,8 @@ static int __init fm10k_init_module(void)
/* create driver workqueue */
fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
fm10k_driver_name);
+ if (!fm10k_workqueue)
+ return -ENOMEM;
fm10k_dbg_init();
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d684998ba2b0..d3cc3427caad 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -790,6 +790,8 @@ struct i40e_vsi {
/* VSI specific handlers */
irqreturn_t (*irq_handler)(int irq, void *data);
+
+ unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */
} ____cacheline_internodealigned_in_smp;
struct i40e_netdev_priv {
@@ -1096,20 +1098,6 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
return !!vsi->xdp_prog;
}
-static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
-{
- bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
- int qid = ring->queue_index;
-
- if (ring_is_xdp(ring))
- qid -= ring->vsi->alloc_queue_pairs;
-
- if (!xdp_on)
- return NULL;
-
- return xdp_get_umem_from_qid(ring->vsi->netdev, qid);
-}
-
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 4c885801fa26..7874d0ec7fb0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2573,8 +2573,7 @@ static int i40e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
return -EOPNOTSUPP;
/* only magic packet is supported */
- if (wol->wolopts && (wol->wolopts != WAKE_MAGIC)
- | (wol->wolopts != WAKE_FILTER))
+ if (wol->wolopts & ~WAKE_MAGIC)
return -EOPNOTSUPP;
/* is this a new value? */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index da62218eb70a..b1c265012c8a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3064,6 +3064,26 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
}
/**
+ * i40e_xsk_umem - Retrieve the AF_XDP ZC if XDP and ZC is enabled
+ * @ring: The Tx or Rx ring
+ *
+ * Returns the UMEM or NULL.
+ **/
+static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
+{
+ bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
+ int qid = ring->queue_index;
+
+ if (ring_is_xdp(ring))
+ qid -= ring->vsi->alloc_queue_pairs;
+
+ if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps))
+ return NULL;
+
+ return xdp_get_umem_from_qid(ring->vsi->netdev, qid);
+}
+
+/**
* i40e_configure_tx_ring - Configure a transmit ring context and rest
* @ring: The Tx ring to configure
*
@@ -10064,6 +10084,12 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
hash_init(vsi->mac_filter_hash);
vsi->irqs_ready = false;
+ if (type == I40E_VSI_MAIN) {
+ vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL);
+ if (!vsi->af_xdp_zc_qps)
+ goto err_rings;
+ }
+
ret = i40e_set_num_rings_in_vsi(vsi);
if (ret)
goto err_rings;
@@ -10082,6 +10108,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
goto unlock_pf;
err_rings:
+ bitmap_free(vsi->af_xdp_zc_qps);
pf->next_vsi = i - 1;
kfree(vsi);
unlock_pf:
@@ -10162,6 +10189,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
+ bitmap_free(vsi->af_xdp_zc_qps);
i40e_vsi_free_arrays(vsi, true);
i40e_clear_rss_config_user(vsi);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 5fb4353c742b..31575c0bb884 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -146,12 +146,13 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
- struct timespec64 now;
+ struct timespec64 now, then;
+ then = ns_to_timespec64(delta);
mutex_lock(&pf->tmreg_lock);
i40e_ptp_read(pf, &now, NULL);
- timespec64_add_ns(&now, delta);
+ now = timespec64_add(now, then);
i40e_ptp_write(pf, (const struct timespec64 *)&now);
mutex_unlock(&pf->tmreg_lock);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index b5c182e688e3..1b17486543ac 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -102,6 +102,8 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
if (err)
return err;
+ set_bit(qid, vsi->af_xdp_zc_qps);
+
if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);
if (if_running) {
@@ -148,6 +150,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
return err;
}
+ clear_bit(qid, vsi->af_xdp_zc_qps);
i40e_xsk_umem_dma_unmap(vsi, umem);
if (if_running) {
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 01fcfc6f3415..d2e2c50ce257 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -194,6 +194,8 @@
/* enable link status from external LINK_0 and LINK_1 pins */
#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */
#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */
+#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */
+#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */
#define E1000_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */
#define E1000_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */
#define E1000_CTRL_RST 0x04000000 /* Global reset */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 69b230c53fed..3269d8e94744 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8740,9 +8740,7 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
struct e1000_hw *hw = &adapter->hw;
u32 ctrl, rctl, status;
u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
-#ifdef CONFIG_PM
- int retval = 0;
-#endif
+ bool wake;
rtnl_lock();
netif_device_detach(netdev);
@@ -8755,14 +8753,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
igb_clear_interrupt_scheme(adapter);
rtnl_unlock();
-#ifdef CONFIG_PM
- if (!runtime) {
- retval = pci_save_state(pdev);
- if (retval)
- return retval;
- }
-#endif
-
status = rd32(E1000_STATUS);
if (status & E1000_STATUS_LU)
wufc &= ~E1000_WUFC_LNKC;
@@ -8779,10 +8769,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
}
ctrl = rd32(E1000_CTRL);
- /* advertise wake from D3Cold */
- #define E1000_CTRL_ADVD3WUC 0x00100000
- /* phy power management enable */
- #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
ctrl |= E1000_CTRL_ADVD3WUC;
wr32(E1000_CTRL, ctrl);
@@ -8796,12 +8782,15 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
wr32(E1000_WUFC, 0);
}
- *enable_wake = wufc || adapter->en_mng_pt;
- if (!*enable_wake)
+ wake = wufc || adapter->en_mng_pt;
+ if (!wake)
igb_power_down_link(adapter);
else
igb_power_up_link(adapter);
+ if (enable_wake)
+ *enable_wake = wake;
+
/* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant.
*/
@@ -8844,22 +8833,7 @@ static void igb_deliver_wake_packet(struct net_device *netdev)
static int __maybe_unused igb_suspend(struct device *dev)
{
- int retval;
- bool wake;
- struct pci_dev *pdev = to_pci_dev(dev);
-
- retval = __igb_shutdown(pdev, &wake, 0);
- if (retval)
- return retval;
-
- if (wake) {
- pci_prepare_to_sleep(pdev);
- } else {
- pci_wake_from_d3(pdev, false);
- pci_set_power_state(pdev, PCI_D3hot);
- }
-
- return 0;
+ return __igb_shutdown(to_pci_dev(dev), NULL, 0);
}
static int __maybe_unused igb_resume(struct device *dev)
@@ -8930,22 +8904,7 @@ static int __maybe_unused igb_runtime_idle(struct device *dev)
static int __maybe_unused igb_runtime_suspend(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- int retval;
- bool wake;
-
- retval = __igb_shutdown(pdev, &wake, 1);
- if (retval)
- return retval;
-
- if (wake) {
- pci_prepare_to_sleep(pdev);
- } else {
- pci_wake_from_d3(pdev, false);
- pci_set_power_state(pdev, PCI_D3hot);
- }
-
- return 0;
+ return __igb_shutdown(to_pci_dev(dev), NULL, 1);
}
static int __maybe_unused igb_runtime_resume(struct device *dev)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index cc4907f9ff02..2fb97967961c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -905,13 +905,12 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
struct pci_dev *pdev = adapter->pdev;
struct device *dev = &adapter->netdev->dev;
struct mii_bus *bus;
+ int err = -ENODEV;
- adapter->mii_bus = devm_mdiobus_alloc(dev);
- if (!adapter->mii_bus)
+ bus = devm_mdiobus_alloc(dev);
+ if (!bus)
return -ENOMEM;
- bus = adapter->mii_bus;
-
switch (hw->device_id) {
/* C3000 SoCs */
case IXGBE_DEV_ID_X550EM_A_KR:
@@ -949,12 +948,15 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
*/
hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22;
- return mdiobus_register(bus);
+ err = mdiobus_register(bus);
+ if (!err) {
+ adapter->mii_bus = bus;
+ return 0;
+ }
ixgbe_no_mii_bus:
devm_mdiobus_free(dev, bus);
- adapter->mii_bus = NULL;
- return -ENODEV;
+ return err;
}
/**
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 122927f3a600..d5e5afbdca6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -96,9 +96,6 @@ int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
if (!eproto)
return -EINVAL;
- if (ext != MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet))
- return -EOPNOTSUPP;
-
err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index eac245a93f91..4ab0d030b544 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -122,7 +122,9 @@ out:
return err;
}
-/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B])
+ * minimum speed value is 40Gbps
+ */
static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
{
u32 speed;
@@ -130,10 +132,9 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
int err;
err = mlx5e_port_linkspeed(priv->mdev, &speed);
- if (err) {
- mlx5_core_warn(priv->mdev, "cannot get port speed\n");
- return 0;
- }
+ if (err)
+ speed = SPEED_40000;
+ speed = max_t(u32, speed, SPEED_40000);
xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
@@ -142,7 +143,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
}
static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
- u32 xoff, unsigned int mtu)
+ u32 xoff, unsigned int max_mtu)
{
int i;
@@ -154,11 +155,12 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
}
if (port_buffer->buffer[i].size <
- (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
+ (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
return -ENOMEM;
port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
- port_buffer->buffer[i].xon = port_buffer->buffer[i].xoff - mtu;
+ port_buffer->buffer[i].xon =
+ port_buffer->buffer[i].xoff - max_mtu;
}
return 0;
@@ -166,7 +168,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
/**
* update_buffer_lossy()
- * mtu: device's MTU
+ * max_mtu: netdev's max_mtu
* pfc_en: <input> current pfc configuration
* buffer: <input> current prio to buffer mapping
* xoff: <input> xoff value
@@ -183,7 +185,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
* Return 0 if no error.
* Set change to true if buffer configuration is modified.
*/
-static int update_buffer_lossy(unsigned int mtu,
+static int update_buffer_lossy(unsigned int max_mtu,
u8 pfc_en, u8 *buffer, u32 xoff,
struct mlx5e_port_buffer *port_buffer,
bool *change)
@@ -220,7 +222,7 @@ static int update_buffer_lossy(unsigned int mtu,
}
if (changed) {
- err = update_xoff_threshold(port_buffer, xoff, mtu);
+ err = update_xoff_threshold(port_buffer, xoff, max_mtu);
if (err)
return err;
@@ -230,6 +232,7 @@ static int update_buffer_lossy(unsigned int mtu,
return 0;
}
+#define MINIMUM_MAX_MTU 9216
int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
u32 change, unsigned int mtu,
struct ieee_pfc *pfc,
@@ -241,12 +244,14 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
bool update_prio2buffer = false;
u8 buffer[MLX5E_MAX_PRIORITY];
bool update_buffer = false;
+ unsigned int max_mtu;
u32 total_used = 0;
u8 curr_pfc_en;
int err;
int i;
mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+ max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU);
err = mlx5e_port_query_buffer(priv, &port_buffer);
if (err)
@@ -254,7 +259,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
if (err)
return err;
}
@@ -264,7 +269,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff,
+ err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff,
&port_buffer, &update_buffer);
if (err)
return err;
@@ -276,8 +281,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff,
- &port_buffer, &update_buffer);
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer,
+ xoff, &port_buffer, &update_buffer);
if (err)
return err;
}
@@ -301,7 +306,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
return -EINVAL;
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
if (err)
return err;
}
@@ -309,7 +314,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
/* Need to update buffer configuration if xoff value is changed */
if (!update_buffer && xoff != priv->dcbx.xoff) {
update_buffer = true;
- err = update_xoff_threshold(&port_buffer, xoff, mtu);
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 3078491cc0d0..1539cf3de5dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev,
if (err)
return err;
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
return 0;
}
@@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev,
void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
struct mlx5e_tir *tir)
{
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
mlx5_core_destroy_tir(mdev, tir->tirn);
list_del(&tir->list);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
}
static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
@@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
}
INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+ mutex_init(&mdev->mlx5e_res.td.list_lock);
return 0;
@@ -141,15 +146,17 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_tir *tir;
- int err = -ENOMEM;
+ int err = 0;
u32 tirn = 0;
int inlen;
void *in;
inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
+ if (!in) {
+ err = -ENOMEM;
goto out;
+ }
if (enable_uc_lb)
MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
@@ -157,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
tirn = tir->tirn;
err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
@@ -168,6 +176,7 @@ out:
kvfree(in);
if (err)
netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index a0987cc5fe4a..5efce4a3ff79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -603,16 +603,18 @@ static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev,
__ETHTOOL_LINK_MODE_MASK_NBITS);
}
-static void ptys2ethtool_adver_link(struct mlx5_core_dev *mdev,
- unsigned long *advertising_modes,
- u32 eth_proto_cap)
+static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
+ u32 eth_proto_cap, bool ext)
{
unsigned long proto_cap = eth_proto_cap;
struct ptys2ethtool_config *table;
u32 max_size;
int proto;
- mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size);
+ table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
+ max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
+ ARRAY_SIZE(ptys2legacy_ethtool_table);
+
for_each_set_bit(proto, &proto_cap, max_size)
bitmap_or(advertising_modes, advertising_modes,
table[proto].advertised,
@@ -794,12 +796,12 @@ static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap,
ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
}
-static void get_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_cap,
- u8 tx_pause, u8 rx_pause,
- struct ethtool_link_ksettings *link_ksettings)
+static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause,
+ struct ethtool_link_ksettings *link_ksettings,
+ bool ext)
{
unsigned long *advertising = link_ksettings->link_modes.advertising;
- ptys2ethtool_adver_link(mdev, advertising, eth_proto_cap);
+ ptys2ethtool_adver_link(advertising, eth_proto_cap, ext);
if (rx_pause)
ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
@@ -854,8 +856,9 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp,
struct ethtool_link_ksettings *link_ksettings)
{
unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
+ bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
- ptys2ethtool_adver_link(mdev, lp_advertising, eth_proto_lp);
+ ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext);
}
int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
@@ -872,6 +875,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
u8 an_disable_admin;
u8 an_status;
u8 connector_type;
+ bool admin_ext;
bool ext;
int err;
@@ -886,6 +890,19 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
eth_proto_capability);
eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
eth_proto_admin);
+ /* Fields: eth_proto_admin and ext_eth_proto_admin are
+ * mutually exclusive. Hence try reading legacy advertising
+ * when extended advertising is zero.
+ * admin_ext indicates how eth_proto_admin should be
+ * interpreted
+ */
+ admin_ext = ext;
+ if (ext && !eth_proto_admin) {
+ eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false,
+ eth_proto_admin);
+ admin_ext = false;
+ }
+
eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
eth_proto_oper);
eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
@@ -899,7 +916,8 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
get_supported(mdev, eth_proto_cap, link_ksettings);
- get_advertising(mdev, eth_proto_admin, tx_pause, rx_pause, link_ksettings);
+ get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings,
+ admin_ext);
get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings);
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
@@ -997,19 +1015,17 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
#define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1)
- ext_requested = (link_ksettings->link_modes.advertising[0] >
- MLX5E_PTYS_EXT);
+ ext_requested = !!(link_ksettings->link_modes.advertising[0] >
+ MLX5E_PTYS_EXT ||
+ link_ksettings->link_modes.advertising[1]);
ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
-
- /*when ptys_extended_ethernet is set legacy link modes are deprecated */
- if (ext_requested != ext_supported)
- return -EPROTONOSUPPORT;
+ ext_requested &= ext_supported;
speed = link_ksettings->base.speed;
ethtool2ptys_adver_func = ext_requested ?
mlx5e_ethtool2ptys_ext_adver_link :
mlx5e_ethtool2ptys_adver_link;
- err = mlx5_port_query_eth_proto(mdev, 1, ext_supported, &eproto);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext_requested, &eproto);
if (err) {
netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
__func__, err);
@@ -1037,7 +1053,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
if (!an_changes && link_modes == eproto.admin)
goto out;
- mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_supported);
+ mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_requested);
mlx5_toggle_port_link(mdev);
out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index b4967a0ff8c7..d75dc44eb2ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2158,6 +2158,52 @@ static bool csum_offload_supported(struct mlx5e_priv *priv,
return true;
}
+struct ip_ttl_word {
+ __u8 ttl;
+ __u8 protocol;
+ __sum16 check;
+};
+
+struct ipv6_hoplimit_word {
+ __be16 payload_len;
+ __u8 nexthdr;
+ __u8 hop_limit;
+};
+
+static bool is_action_keys_supported(const struct flow_action_entry *act)
+{
+ u32 mask, offset;
+ u8 htype;
+
+ htype = act->mangle.htype;
+ offset = act->mangle.offset;
+ mask = ~act->mangle.mask;
+ /* For IPv4 & IPv6 header check 4 byte word,
+ * to determine that modified fields
+ * are NOT ttl & hop_limit only.
+ */
+ if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
+ struct ip_ttl_word *ttl_word =
+ (struct ip_ttl_word *)&mask;
+
+ if (offset != offsetof(struct iphdr, ttl) ||
+ ttl_word->protocol ||
+ ttl_word->check) {
+ return true;
+ }
+ } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+ struct ipv6_hoplimit_word *hoplimit_word =
+ (struct ipv6_hoplimit_word *)&mask;
+
+ if (offset != offsetof(struct ipv6hdr, payload_len) ||
+ hoplimit_word->payload_len ||
+ hoplimit_word->nexthdr) {
+ return true;
+ }
+ }
+ return false;
+}
+
static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
struct flow_action *flow_action,
u32 actions,
@@ -2165,9 +2211,9 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
{
const struct flow_action_entry *act;
bool modify_ip_header;
- u8 htype, ip_proto;
void *headers_v;
u16 ethertype;
+ u8 ip_proto;
int i;
if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)
@@ -2187,9 +2233,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
act->id != FLOW_ACTION_ADD)
continue;
- htype = act->mangle.htype;
- if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4 ||
- htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+ if (is_action_keys_supported(act)) {
modify_ip_header = true;
break;
}
@@ -2340,15 +2384,22 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
return 0;
}
-static inline int cmp_encap_info(struct ip_tunnel_key *a,
- struct ip_tunnel_key *b)
+struct encap_key {
+ struct ip_tunnel_key *ip_tun_key;
+ int tunnel_type;
+};
+
+static inline int cmp_encap_info(struct encap_key *a,
+ struct encap_key *b)
{
- return memcmp(a, b, sizeof(*a));
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
+ a->tunnel_type != b->tunnel_type;
}
-static inline int hash_encap_info(struct ip_tunnel_key *key)
+static inline int hash_encap_info(struct encap_key *key)
{
- return jhash(key, sizeof(*key), 0);
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+ key->tunnel_type);
}
@@ -2379,7 +2430,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct ip_tunnel_info *tun_info;
- struct ip_tunnel_key *key;
+ struct encap_key key, e_key;
struct mlx5e_encap_entry *e;
unsigned short family;
uintptr_t hash_key;
@@ -2389,13 +2440,16 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
parse_attr = attr->parse_attr;
tun_info = &parse_attr->tun_info[out_index];
family = ip_tunnel_info_af(tun_info);
- key = &tun_info->key;
+ key.ip_tun_key = &tun_info->key;
+ key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev);
- hash_key = hash_encap_info(key);
+ hash_key = hash_encap_info(&key);
hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
encap_hlist, hash_key) {
- if (!cmp_encap_info(&e->tun_info.key, key)) {
+ e_key.ip_tun_key = &e->tun_info.key;
+ e_key.tunnel_type = e->tunnel_type;
+ if (!cmp_encap_info(&e_key, &key)) {
found = true;
break;
}
@@ -2657,7 +2711,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
- err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
+ err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
parse_attr, hdrs, extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ecd2c747f726..8a67fd197b79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -105,8 +105,7 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
in, nic_vport_context);
@@ -134,8 +133,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
MLX5_SET(modify_esw_vport_context_in, in, opcode,
MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
@@ -431,6 +429,8 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw)
{
int err;
+ memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+
err = esw_create_legacy_vepa_table(esw);
if (err)
return err;
@@ -2157,6 +2157,7 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
/* Star rule to forward all traffic to uplink vport */
memset(spec, 0, sizeof(*spec));
+ memset(&dest, 0, sizeof(dest));
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport.num = MLX5_VPORT_UPLINK;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f2260391be5b..9b2d78ee22b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1611,6 +1611,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
{
int err;
+ memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
err = esw_create_offloads_fdb_tables(esw, nvports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index 5cf5f2a9d51f..8de64e88c670 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -217,15 +217,21 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
void *cmd;
int ret;
+ rcu_read_lock();
+ flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+ rcu_read_unlock();
+
+ if (!flow) {
+ WARN_ONCE(1, "Received NULL pointer for handle\n");
+ return -EINVAL;
+ }
+
buf = kzalloc(size, GFP_ATOMIC);
if (!buf)
return -ENOMEM;
cmd = (buf + 1);
- rcu_read_lock();
- flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
- rcu_read_unlock();
mlx5_fpga_tls_flow_to_cmd(flow, cmd);
MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
@@ -238,6 +244,8 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
buf->complete = mlx_tls_kfree_complete;
ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+ if (ret < 0)
+ kfree(buf);
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 70cc906a102b..76716419370d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -164,26 +164,6 @@ static struct mlx5_profile profile[] = {
.size = 8,
.limit = 4
},
- .mr_cache[16] = {
- .size = 8,
- .limit = 4
- },
- .mr_cache[17] = {
- .size = 8,
- .limit = 4
- },
- .mr_cache[18] = {
- .size = 8,
- .limit = 4
- },
- .mr_cache[19] = {
- .size = 4,
- .limit = 2
- },
- .mr_cache[20] = {
- .size = 4,
- .limit = 2
- },
},
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 7a15e932ed2f..c1c1965d7acc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -113,7 +113,7 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
return 0;
default:
/* Do not consider thresholds for zero temperature. */
- if (!MLXSW_REG_MTMP_TEMP_TO_MC(module_temp)) {
+ if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) {
*temp = 0;
return 0;
}
diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index bd6e9014bc74..7849119d407a 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -142,6 +142,12 @@ struct ks8851_net {
static int msg_enable;
+/* SPI frame opcodes */
+#define KS_SPIOP_RD (0x00)
+#define KS_SPIOP_WR (0x40)
+#define KS_SPIOP_RXFIFO (0x80)
+#define KS_SPIOP_TXFIFO (0xC0)
+
/* shift for byte-enable data */
#define BYTE_EN(_x) ((_x) << 2)
@@ -535,9 +541,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
/* set dma read address */
ks8851_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI | 0x00);
- /* start the packet dma process, and set auto-dequeue rx */
- ks8851_wrreg16(ks, KS_RXQCR,
- ks->rc_rxqcr | RXQCR_SDA | RXQCR_ADRFE);
+ /* start DMA access */
+ ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
if (rxlen > 4) {
unsigned int rxalign;
@@ -568,7 +573,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
}
}
- ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
+ /* end DMA access and dequeue packet */
+ ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_RRXEF);
}
}
@@ -785,6 +791,15 @@ static void ks8851_tx_work(struct work_struct *work)
static int ks8851_net_open(struct net_device *dev)
{
struct ks8851_net *ks = netdev_priv(dev);
+ int ret;
+
+ ret = request_threaded_irq(dev->irq, NULL, ks8851_irq,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+ dev->name, ks);
+ if (ret < 0) {
+ netdev_err(dev, "failed to get irq\n");
+ return ret;
+ }
/* lock the card, even if we may not actually be doing anything
* else at the moment */
@@ -849,6 +864,7 @@ static int ks8851_net_open(struct net_device *dev)
netif_dbg(ks, ifup, ks->netdev, "network device up\n");
mutex_unlock(&ks->lock);
+ mii_check_link(&ks->mii);
return 0;
}
@@ -899,6 +915,8 @@ static int ks8851_net_stop(struct net_device *dev)
dev_kfree_skb(txb);
}
+ free_irq(dev->irq, ks);
+
return 0;
}
@@ -1508,6 +1526,7 @@ static int ks8851_probe(struct spi_device *spi)
spi_set_drvdata(spi, ks);
+ netif_carrier_off(ks->netdev);
ndev->if_port = IF_PORT_100BASET;
ndev->netdev_ops = &ks8851_netdev_ops;
ndev->irq = spi->irq;
@@ -1529,14 +1548,6 @@ static int ks8851_probe(struct spi_device *spi)
ks8851_read_selftest(ks);
ks8851_init_mac(ks);
- ret = request_threaded_irq(spi->irq, NULL, ks8851_irq,
- IRQF_TRIGGER_LOW | IRQF_ONESHOT,
- ndev->name, ks);
- if (ret < 0) {
- dev_err(&spi->dev, "failed to get irq\n");
- goto err_irq;
- }
-
ret = register_netdev(ndev);
if (ret) {
dev_err(&spi->dev, "failed to register network device\n");
@@ -1549,14 +1560,10 @@ static int ks8851_probe(struct spi_device *spi)
return 0;
-
err_netdev:
- free_irq(ndev->irq, ks);
-
-err_irq:
+err_id:
if (gpio_is_valid(gpio))
gpio_set_value(gpio, 0);
-err_id:
regulator_disable(ks->vdd_reg);
err_reg:
regulator_disable(ks->vdd_io);
@@ -1574,7 +1581,6 @@ static int ks8851_remove(struct spi_device *spi)
dev_info(&spi->dev, "remove\n");
unregister_netdev(priv->netdev);
- free_irq(spi->irq, priv);
if (gpio_is_valid(priv->gpio))
gpio_set_value(priv->gpio, 0);
regulator_disable(priv->vdd_reg);
diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
index 852256ef1f22..23da1e3ee429 100644
--- a/drivers/net/ethernet/micrel/ks8851.h
+++ b/drivers/net/ethernet/micrel/ks8851.h
@@ -11,9 +11,15 @@
*/
#define KS_CCR 0x08
+#define CCR_LE (1 << 10) /* KSZ8851-16MLL */
#define CCR_EEPROM (1 << 9)
-#define CCR_SPI (1 << 8)
-#define CCR_32PIN (1 << 0)
+#define CCR_SPI (1 << 8) /* KSZ8851SNL */
+#define CCR_8BIT (1 << 7) /* KSZ8851-16MLL */
+#define CCR_16BIT (1 << 6) /* KSZ8851-16MLL */
+#define CCR_32BIT (1 << 5) /* KSZ8851-16MLL */
+#define CCR_SHARED (1 << 4) /* KSZ8851-16MLL */
+#define CCR_48PIN (1 << 1) /* KSZ8851-16MLL */
+#define CCR_32PIN (1 << 0) /* KSZ8851SNL */
/* MAC address registers */
#define KS_MAR(_m) (0x15 - (_m))
@@ -112,13 +118,13 @@
#define RXCR1_RXE (1 << 0)
#define KS_RXCR2 0x76
-#define RXCR2_SRDBL_MASK (0x7 << 5)
-#define RXCR2_SRDBL_SHIFT (5)
-#define RXCR2_SRDBL_4B (0x0 << 5)
-#define RXCR2_SRDBL_8B (0x1 << 5)
-#define RXCR2_SRDBL_16B (0x2 << 5)
-#define RXCR2_SRDBL_32B (0x3 << 5)
-#define RXCR2_SRDBL_FRAME (0x4 << 5)
+#define RXCR2_SRDBL_MASK (0x7 << 5) /* KSZ8851SNL */
+#define RXCR2_SRDBL_SHIFT (5) /* KSZ8851SNL */
+#define RXCR2_SRDBL_4B (0x0 << 5) /* KSZ8851SNL */
+#define RXCR2_SRDBL_8B (0x1 << 5) /* KSZ8851SNL */
+#define RXCR2_SRDBL_16B (0x2 << 5) /* KSZ8851SNL */
+#define RXCR2_SRDBL_32B (0x3 << 5) /* KSZ8851SNL */
+#define RXCR2_SRDBL_FRAME (0x4 << 5) /* KSZ8851SNL */
#define RXCR2_IUFFP (1 << 4)
#define RXCR2_RXIUFCEZ (1 << 3)
#define RXCR2_UDPLFE (1 << 2)
@@ -143,8 +149,10 @@
#define RXFSHR_RXCE (1 << 0)
#define KS_RXFHBCR 0x7E
+#define RXFHBCR_CNT_MASK (0xfff << 0)
+
#define KS_TXQCR 0x80
-#define TXQCR_AETFE (1 << 2)
+#define TXQCR_AETFE (1 << 2) /* KSZ8851SNL */
#define TXQCR_TXQMAM (1 << 1)
#define TXQCR_METFE (1 << 0)
@@ -167,6 +175,10 @@
#define KS_RXFDPR 0x86
#define RXFDPR_RXFPAI (1 << 14)
+#define RXFDPR_WST (1 << 12) /* KSZ8851-16MLL */
+#define RXFDPR_EMS (1 << 11) /* KSZ8851-16MLL */
+#define RXFDPR_RXFP_MASK (0x7ff << 0)
+#define RXFDPR_RXFP_SHIFT (0)
#define KS_RXDTTR 0x8C
#define KS_RXDBCTR 0x8E
@@ -184,7 +196,7 @@
#define IRQ_RXMPDI (1 << 4)
#define IRQ_LDI (1 << 3)
#define IRQ_EDI (1 << 2)
-#define IRQ_SPIBEI (1 << 1)
+#define IRQ_SPIBEI (1 << 1) /* KSZ8851SNL */
#define IRQ_DEDI (1 << 0)
#define KS_RXFCTR 0x9C
@@ -257,42 +269,37 @@
#define KS_P1ANLPR 0xEE
#define KS_P1SCLMD 0xF4
-#define P1SCLMD_LEDOFF (1 << 15)
-#define P1SCLMD_TXIDS (1 << 14)
-#define P1SCLMD_RESTARTAN (1 << 13)
-#define P1SCLMD_DISAUTOMDIX (1 << 10)
-#define P1SCLMD_FORCEMDIX (1 << 9)
-#define P1SCLMD_AUTONEGEN (1 << 7)
-#define P1SCLMD_FORCE100 (1 << 6)
-#define P1SCLMD_FORCEFDX (1 << 5)
-#define P1SCLMD_ADV_FLOW (1 << 4)
-#define P1SCLMD_ADV_100BT_FDX (1 << 3)
-#define P1SCLMD_ADV_100BT_HDX (1 << 2)
-#define P1SCLMD_ADV_10BT_FDX (1 << 1)
-#define P1SCLMD_ADV_10BT_HDX (1 << 0)
#define KS_P1CR 0xF6
-#define P1CR_HP_MDIX (1 << 15)
-#define P1CR_REV_POL (1 << 13)
-#define P1CR_OP_100M (1 << 10)
-#define P1CR_OP_FDX (1 << 9)
-#define P1CR_OP_MDI (1 << 7)
-#define P1CR_AN_DONE (1 << 6)
-#define P1CR_LINK_GOOD (1 << 5)
-#define P1CR_PNTR_FLOW (1 << 4)
-#define P1CR_PNTR_100BT_FDX (1 << 3)
-#define P1CR_PNTR_100BT_HDX (1 << 2)
-#define P1CR_PNTR_10BT_FDX (1 << 1)
-#define P1CR_PNTR_10BT_HDX (1 << 0)
+#define P1CR_LEDOFF (1 << 15)
+#define P1CR_TXIDS (1 << 14)
+#define P1CR_RESTARTAN (1 << 13)
+#define P1CR_DISAUTOMDIX (1 << 10)
+#define P1CR_FORCEMDIX (1 << 9)
+#define P1CR_AUTONEGEN (1 << 7)
+#define P1CR_FORCE100 (1 << 6)
+#define P1CR_FORCEFDX (1 << 5)
+#define P1CR_ADV_FLOW (1 << 4)
+#define P1CR_ADV_100BT_FDX (1 << 3)
+#define P1CR_ADV_100BT_HDX (1 << 2)
+#define P1CR_ADV_10BT_FDX (1 << 1)
+#define P1CR_ADV_10BT_HDX (1 << 0)
+
+#define KS_P1SR 0xF8
+#define P1SR_HP_MDIX (1 << 15)
+#define P1SR_REV_POL (1 << 13)
+#define P1SR_OP_100M (1 << 10)
+#define P1SR_OP_FDX (1 << 9)
+#define P1SR_OP_MDI (1 << 7)
+#define P1SR_AN_DONE (1 << 6)
+#define P1SR_LINK_GOOD (1 << 5)
+#define P1SR_PNTR_FLOW (1 << 4)
+#define P1SR_PNTR_100BT_FDX (1 << 3)
+#define P1SR_PNTR_100BT_HDX (1 << 2)
+#define P1SR_PNTR_10BT_FDX (1 << 1)
+#define P1SR_PNTR_10BT_HDX (1 << 0)
/* TX Frame control */
-
#define TXFR_TXIC (1 << 15)
#define TXFR_TXFID_MASK (0x3f << 0)
#define TXFR_TXFID_SHIFT (0)
-
-/* SPI frame opcodes */
-#define KS_SPIOP_RD (0x00)
-#define KS_SPIOP_WR (0x40)
-#define KS_SPIOP_RXFIFO (0x80)
-#define KS_SPIOP_TXFIFO (0xC0)
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index 35f8c9ef204d..c946841c0a06 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c
@@ -40,6 +40,8 @@
#include <linux/of_device.h>
#include <linux/of_net.h>
+#include "ks8851.h"
+
#define DRV_NAME "ks8851_mll"
static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 };
@@ -48,319 +50,10 @@ static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 };
#define TX_BUF_SIZE 2000
#define RX_BUF_SIZE 2000
-#define KS_CCR 0x08
-#define CCR_EEPROM (1 << 9)
-#define CCR_SPI (1 << 8)
-#define CCR_8BIT (1 << 7)
-#define CCR_16BIT (1 << 6)
-#define CCR_32BIT (1 << 5)
-#define CCR_SHARED (1 << 4)
-#define CCR_32PIN (1 << 0)
-
-/* MAC address registers */
-#define KS_MARL 0x10
-#define KS_MARM 0x12
-#define KS_MARH 0x14
-
-#define KS_OBCR 0x20
-#define OBCR_ODS_16MA (1 << 6)
-
-#define KS_EEPCR 0x22
-#define EEPCR_EESA (1 << 4)
-#define EEPCR_EESB (1 << 3)
-#define EEPCR_EEDO (1 << 2)
-#define EEPCR_EESCK (1 << 1)
-#define EEPCR_EECS (1 << 0)
-
-#define KS_MBIR 0x24
-#define MBIR_TXMBF (1 << 12)
-#define MBIR_TXMBFA (1 << 11)
-#define MBIR_RXMBF (1 << 4)
-#define MBIR_RXMBFA (1 << 3)
-
-#define KS_GRR 0x26
-#define GRR_QMU (1 << 1)
-#define GRR_GSR (1 << 0)
-
-#define KS_WFCR 0x2A
-#define WFCR_MPRXE (1 << 7)
-#define WFCR_WF3E (1 << 3)
-#define WFCR_WF2E (1 << 2)
-#define WFCR_WF1E (1 << 1)
-#define WFCR_WF0E (1 << 0)
-
-#define KS_WF0CRC0 0x30
-#define KS_WF0CRC1 0x32
-#define KS_WF0BM0 0x34
-#define KS_WF0BM1 0x36
-#define KS_WF0BM2 0x38
-#define KS_WF0BM3 0x3A
-
-#define KS_WF1CRC0 0x40
-#define KS_WF1CRC1 0x42
-#define KS_WF1BM0 0x44
-#define KS_WF1BM1 0x46
-#define KS_WF1BM2 0x48
-#define KS_WF1BM3 0x4A
-
-#define KS_WF2CRC0 0x50
-#define KS_WF2CRC1 0x52
-#define KS_WF2BM0 0x54
-#define KS_WF2BM1 0x56
-#define KS_WF2BM2 0x58
-#define KS_WF2BM3 0x5A
-
-#define KS_WF3CRC0 0x60
-#define KS_WF3CRC1 0x62
-#define KS_WF3BM0 0x64
-#define KS_WF3BM1 0x66
-#define KS_WF3BM2 0x68
-#define KS_WF3BM3 0x6A
-
-#define KS_TXCR 0x70
-#define TXCR_TCGICMP (1 << 8)
-#define TXCR_TCGUDP (1 << 7)
-#define TXCR_TCGTCP (1 << 6)
-#define TXCR_TCGIP (1 << 5)
-#define TXCR_FTXQ (1 << 4)
-#define TXCR_TXFCE (1 << 3)
-#define TXCR_TXPE (1 << 2)
-#define TXCR_TXCRC (1 << 1)
-#define TXCR_TXE (1 << 0)
-
-#define KS_TXSR 0x72
-#define TXSR_TXLC (1 << 13)
-#define TXSR_TXMC (1 << 12)
-#define TXSR_TXFID_MASK (0x3f << 0)
-#define TXSR_TXFID_SHIFT (0)
-#define TXSR_TXFID_GET(_v) (((_v) >> 0) & 0x3f)
-
-
-#define KS_RXCR1 0x74
-#define RXCR1_FRXQ (1 << 15)
-#define RXCR1_RXUDPFCC (1 << 14)
-#define RXCR1_RXTCPFCC (1 << 13)
-#define RXCR1_RXIPFCC (1 << 12)
-#define RXCR1_RXPAFMA (1 << 11)
-#define RXCR1_RXFCE (1 << 10)
-#define RXCR1_RXEFE (1 << 9)
-#define RXCR1_RXMAFMA (1 << 8)
-#define RXCR1_RXBE (1 << 7)
-#define RXCR1_RXME (1 << 6)
-#define RXCR1_RXUE (1 << 5)
-#define RXCR1_RXAE (1 << 4)
-#define RXCR1_RXINVF (1 << 1)
-#define RXCR1_RXE (1 << 0)
#define RXCR1_FILTER_MASK (RXCR1_RXINVF | RXCR1_RXAE | \
RXCR1_RXMAFMA | RXCR1_RXPAFMA)
-
-#define KS_RXCR2 0x76
-#define RXCR2_SRDBL_MASK (0x7 << 5)
-#define RXCR2_SRDBL_SHIFT (5)
-#define RXCR2_SRDBL_4B (0x0 << 5)
-#define RXCR2_SRDBL_8B (0x1 << 5)
-#define RXCR2_SRDBL_16B (0x2 << 5)
-#define RXCR2_SRDBL_32B (0x3 << 5)
-/* #define RXCR2_SRDBL_FRAME (0x4 << 5) */
-#define RXCR2_IUFFP (1 << 4)
-#define RXCR2_RXIUFCEZ (1 << 3)
-#define RXCR2_UDPLFE (1 << 2)
-#define RXCR2_RXICMPFCC (1 << 1)
-#define RXCR2_RXSAF (1 << 0)
-
-#define KS_TXMIR 0x78
-
-#define KS_RXFHSR 0x7C
-#define RXFSHR_RXFV (1 << 15)
-#define RXFSHR_RXICMPFCS (1 << 13)
-#define RXFSHR_RXIPFCS (1 << 12)
-#define RXFSHR_RXTCPFCS (1 << 11)
-#define RXFSHR_RXUDPFCS (1 << 10)
-#define RXFSHR_RXBF (1 << 7)
-#define RXFSHR_RXMF (1 << 6)
-#define RXFSHR_RXUF (1 << 5)
-#define RXFSHR_RXMR (1 << 4)
-#define RXFSHR_RXFT (1 << 3)
-#define RXFSHR_RXFTL (1 << 2)
-#define RXFSHR_RXRF (1 << 1)
-#define RXFSHR_RXCE (1 << 0)
-#define RXFSHR_ERR (RXFSHR_RXCE | RXFSHR_RXRF |\
- RXFSHR_RXFTL | RXFSHR_RXMR |\
- RXFSHR_RXICMPFCS | RXFSHR_RXIPFCS |\
- RXFSHR_RXTCPFCS)
-#define KS_RXFHBCR 0x7E
-#define RXFHBCR_CNT_MASK 0x0FFF
-
-#define KS_TXQCR 0x80
-#define TXQCR_AETFE (1 << 2)
-#define TXQCR_TXQMAM (1 << 1)
-#define TXQCR_METFE (1 << 0)
-
-#define KS_RXQCR 0x82
-#define RXQCR_RXDTTS (1 << 12)
-#define RXQCR_RXDBCTS (1 << 11)
-#define RXQCR_RXFCTS (1 << 10)
-#define RXQCR_RXIPHTOE (1 << 9)
-#define RXQCR_RXDTTE (1 << 7)
-#define RXQCR_RXDBCTE (1 << 6)
-#define RXQCR_RXFCTE (1 << 5)
-#define RXQCR_ADRFE (1 << 4)
-#define RXQCR_SDA (1 << 3)
-#define RXQCR_RRXEF (1 << 0)
#define RXQCR_CMD_CNTL (RXQCR_RXFCTE|RXQCR_ADRFE)
-#define KS_TXFDPR 0x84
-#define TXFDPR_TXFPAI (1 << 14)
-#define TXFDPR_TXFP_MASK (0x7ff << 0)
-#define TXFDPR_TXFP_SHIFT (0)
-
-#define KS_RXFDPR 0x86
-#define RXFDPR_RXFPAI (1 << 14)
-
-#define KS_RXDTTR 0x8C
-#define KS_RXDBCTR 0x8E
-
-#define KS_IER 0x90
-#define KS_ISR 0x92
-#define IRQ_LCI (1 << 15)
-#define IRQ_TXI (1 << 14)
-#define IRQ_RXI (1 << 13)
-#define IRQ_RXOI (1 << 11)
-#define IRQ_TXPSI (1 << 9)
-#define IRQ_RXPSI (1 << 8)
-#define IRQ_TXSAI (1 << 6)
-#define IRQ_RXWFDI (1 << 5)
-#define IRQ_RXMPDI (1 << 4)
-#define IRQ_LDI (1 << 3)
-#define IRQ_EDI (1 << 2)
-#define IRQ_SPIBEI (1 << 1)
-#define IRQ_DEDI (1 << 0)
-
-#define KS_RXFCTR 0x9C
-#define RXFCTR_THRESHOLD_MASK 0x00FF
-
-#define KS_RXFC 0x9D
-#define RXFCTR_RXFC_MASK (0xff << 8)
-#define RXFCTR_RXFC_SHIFT (8)
-#define RXFCTR_RXFC_GET(_v) (((_v) >> 8) & 0xff)
-#define RXFCTR_RXFCT_MASK (0xff << 0)
-#define RXFCTR_RXFCT_SHIFT (0)
-
-#define KS_TXNTFSR 0x9E
-
-#define KS_MAHTR0 0xA0
-#define KS_MAHTR1 0xA2
-#define KS_MAHTR2 0xA4
-#define KS_MAHTR3 0xA6
-
-#define KS_FCLWR 0xB0
-#define KS_FCHWR 0xB2
-#define KS_FCOWR 0xB4
-
-#define KS_CIDER 0xC0
-#define CIDER_ID 0x8870
-#define CIDER_REV_MASK (0x7 << 1)
-#define CIDER_REV_SHIFT (1)
-#define CIDER_REV_GET(_v) (((_v) >> 1) & 0x7)
-
-#define KS_CGCR 0xC6
-#define KS_IACR 0xC8
-#define IACR_RDEN (1 << 12)
-#define IACR_TSEL_MASK (0x3 << 10)
-#define IACR_TSEL_SHIFT (10)
-#define IACR_TSEL_MIB (0x3 << 10)
-#define IACR_ADDR_MASK (0x1f << 0)
-#define IACR_ADDR_SHIFT (0)
-
-#define KS_IADLR 0xD0
-#define KS_IAHDR 0xD2
-
-#define KS_PMECR 0xD4
-#define PMECR_PME_DELAY (1 << 14)
-#define PMECR_PME_POL (1 << 12)
-#define PMECR_WOL_WAKEUP (1 << 11)
-#define PMECR_WOL_MAGICPKT (1 << 10)
-#define PMECR_WOL_LINKUP (1 << 9)
-#define PMECR_WOL_ENERGY (1 << 8)
-#define PMECR_AUTO_WAKE_EN (1 << 7)
-#define PMECR_WAKEUP_NORMAL (1 << 6)
-#define PMECR_WKEVT_MASK (0xf << 2)
-#define PMECR_WKEVT_SHIFT (2)
-#define PMECR_WKEVT_GET(_v) (((_v) >> 2) & 0xf)
-#define PMECR_WKEVT_ENERGY (0x1 << 2)
-#define PMECR_WKEVT_LINK (0x2 << 2)
-#define PMECR_WKEVT_MAGICPKT (0x4 << 2)
-#define PMECR_WKEVT_FRAME (0x8 << 2)
-#define PMECR_PM_MASK (0x3 << 0)
-#define PMECR_PM_SHIFT (0)
-#define PMECR_PM_NORMAL (0x0 << 0)
-#define PMECR_PM_ENERGY (0x1 << 0)
-#define PMECR_PM_SOFTDOWN (0x2 << 0)
-#define PMECR_PM_POWERSAVE (0x3 << 0)
-
-/* Standard MII PHY data */
-#define KS_P1MBCR 0xE4
-#define P1MBCR_FORCE_FDX (1 << 8)
-
-#define KS_P1MBSR 0xE6
-#define P1MBSR_AN_COMPLETE (1 << 5)
-#define P1MBSR_AN_CAPABLE (1 << 3)
-#define P1MBSR_LINK_UP (1 << 2)
-
-#define KS_PHY1ILR 0xE8
-#define KS_PHY1IHR 0xEA
-#define KS_P1ANAR 0xEC
-#define KS_P1ANLPR 0xEE
-
-#define KS_P1SCLMD 0xF4
-#define P1SCLMD_LEDOFF (1 << 15)
-#define P1SCLMD_TXIDS (1 << 14)
-#define P1SCLMD_RESTARTAN (1 << 13)
-#define P1SCLMD_DISAUTOMDIX (1 << 10)
-#define P1SCLMD_FORCEMDIX (1 << 9)
-#define P1SCLMD_AUTONEGEN (1 << 7)
-#define P1SCLMD_FORCE100 (1 << 6)
-#define P1SCLMD_FORCEFDX (1 << 5)
-#define P1SCLMD_ADV_FLOW (1 << 4)
-#define P1SCLMD_ADV_100BT_FDX (1 << 3)
-#define P1SCLMD_ADV_100BT_HDX (1 << 2)
-#define P1SCLMD_ADV_10BT_FDX (1 << 1)
-#define P1SCLMD_ADV_10BT_HDX (1 << 0)
-
-#define KS_P1CR 0xF6
-#define P1CR_HP_MDIX (1 << 15)
-#define P1CR_REV_POL (1 << 13)
-#define P1CR_OP_100M (1 << 10)
-#define P1CR_OP_FDX (1 << 9)
-#define P1CR_OP_MDI (1 << 7)
-#define P1CR_AN_DONE (1 << 6)
-#define P1CR_LINK_GOOD (1 << 5)
-#define P1CR_PNTR_FLOW (1 << 4)
-#define P1CR_PNTR_100BT_FDX (1 << 3)
-#define P1CR_PNTR_100BT_HDX (1 << 2)
-#define P1CR_PNTR_10BT_FDX (1 << 1)
-#define P1CR_PNTR_10BT_HDX (1 << 0)
-
-/* TX Frame control */
-
-#define TXFR_TXIC (1 << 15)
-#define TXFR_TXFID_MASK (0x3f << 0)
-#define TXFR_TXFID_SHIFT (0)
-
-#define KS_P1SR 0xF8
-#define P1SR_HP_MDIX (1 << 15)
-#define P1SR_REV_POL (1 << 13)
-#define P1SR_OP_100M (1 << 10)
-#define P1SR_OP_FDX (1 << 9)
-#define P1SR_OP_MDI (1 << 7)
-#define P1SR_AN_DONE (1 << 6)
-#define P1SR_LINK_GOOD (1 << 5)
-#define P1SR_PNTR_FLOW (1 << 4)
-#define P1SR_PNTR_100BT_FDX (1 << 3)
-#define P1SR_PNTR_100BT_HDX (1 << 2)
-#define P1SR_PNTR_10BT_FDX (1 << 1)
-#define P1SR_PNTR_10BT_HDX (1 << 0)
-
#define ENUM_BUS_NONE 0
#define ENUM_BUS_8BIT 1
#define ENUM_BUS_16BIT 2
@@ -1475,7 +1168,7 @@ static void ks_setup(struct ks_net *ks)
ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI);
/* Setup Receive Frame Threshold - 1 frame (RXFCTFC) */
- ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_THRESHOLD_MASK);
+ ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_RXFCT_MASK);
/* Setup RxQ Command Control (RXQCR) */
ks->rc_rxqcr = RXQCR_CMD_CNTL;
@@ -1488,7 +1181,7 @@ static void ks_setup(struct ks_net *ks)
*/
w = ks_rdreg16(ks, KS_P1MBCR);
- w &= ~P1MBCR_FORCE_FDX;
+ w &= ~BMCR_FULLDPLX;
ks_wrreg16(ks, KS_P1MBCR, w);
w = TXCR_TXFCE | TXCR_TXPE | TXCR_TXCRC | TXCR_TCGIP;
@@ -1629,7 +1322,7 @@ static int ks8851_probe(struct platform_device *pdev)
ks_setup_int(ks);
data = ks_rdreg16(ks, KS_OBCR);
- ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16MA);
+ ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16mA);
/* overwriting the default MAC address */
if (pdev->dev.of_node) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index eeda4ed98333..e336f6ee94f5 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -48,8 +48,7 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
tmp_push_vlan_tci =
FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, act->vlan.prio) |
- FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid) |
- NFP_FL_PUSH_VLAN_CFI;
+ FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid);
push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 4fcaf11ed56e..0ed51e79db00 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -26,7 +26,7 @@
#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6)
#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13)
-#define NFP_FLOWER_MASK_VLAN_CFI BIT(12)
+#define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12)
#define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0)
#define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12)
@@ -82,7 +82,6 @@
#define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0)
#define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13)
-#define NFP_FL_PUSH_VLAN_CFI BIT(12)
#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0)
#define IPV6_FLOW_LABEL_MASK cpu_to_be32(0x000fffff)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index e03c8ef2c28c..9b8b843d0340 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -30,20 +30,19 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
flow_rule_match_vlan(rule, &match);
/* Populate the tci field. */
- if (match.key->vlan_id || match.key->vlan_priority) {
- tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
- match.key->vlan_priority) |
- FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
- match.key->vlan_id) |
- NFP_FLOWER_MASK_VLAN_CFI;
- ext->tci = cpu_to_be16(tmp_tci);
- tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
- match.mask->vlan_priority) |
- FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
- match.mask->vlan_id) |
- NFP_FLOWER_MASK_VLAN_CFI;
- msk->tci = cpu_to_be16(tmp_tci);
- }
+ tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+ tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+ match.key->vlan_priority) |
+ FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+ match.key->vlan_id);
+ ext->tci = cpu_to_be16(tmp_tci);
+
+ tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+ tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+ match.mask->vlan_priority) |
+ FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+ match.mask->vlan_id);
+ msk->tci = cpu_to_be16(tmp_tci);
}
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index d2c803bb4e56..94d228c04496 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -195,7 +195,7 @@ static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
ret = dev_queue_xmit(skb);
nfp_repr_inc_tx_stats(netdev, len, ret);
- return ret;
+ return NETDEV_TX_OK;
}
static int nfp_repr_stop(struct net_device *netdev)
@@ -383,7 +383,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS;
- netdev->priv_flags |= IFF_NO_QUEUE;
+ netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
netdev->features |= NETIF_F_LLTX;
if (nfp_app_has_tc(app)) {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 0c443ea98479..374a4d4371f9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -497,7 +497,7 @@ struct qlcnic_hardware_context {
u16 board_type;
u16 supported_type;
- u16 link_speed;
+ u32 link_speed;
u16 link_duplex;
u16 link_autoneg;
u16 module_type;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 3b0adda7cc9c..a4cd6f2cfb86 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -1048,6 +1048,8 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) {
skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE);
+ if (!skb)
+ break;
qlcnic_create_loopback_buff(skb->data, adapter->mac_addr);
skb_put(skb, QLCNIC_ILB_PKT_SIZE);
adapter->ahw->diag_cnt = 0;
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index cfb67b746595..58e0ca9093d3 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -482,7 +482,7 @@ static void hardware_init(struct net_device *dev)
write_reg_high(ioaddr, IMR, ISRh_RxErr);
lp->tx_unit_busy = 0;
- lp->pac_cnt_in_tx_buf = 0;
+ lp->pac_cnt_in_tx_buf = 0;
lp->saved_tx_size = 0;
}
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index c29dde064078..ed651dde6ef9 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -28,6 +28,7 @@
#include <linux/pm_runtime.h>
#include <linux/firmware.h>
#include <linux/prefetch.h>
+#include <linux/pci-aspm.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
@@ -678,6 +679,7 @@ struct rtl8169_private {
struct work_struct work;
} wk;
+ unsigned irq_enabled:1;
unsigned supports_gmii:1;
dma_addr_t counters_phys_addr;
struct rtl8169_counters *counters;
@@ -1293,6 +1295,7 @@ static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
static void rtl_irq_disable(struct rtl8169_private *tp)
{
RTL_W16(tp, IntrMask, 0);
+ tp->irq_enabled = 0;
}
#define RTL_EVENT_NAPI_RX (RxOK | RxErr)
@@ -1301,6 +1304,7 @@ static void rtl_irq_disable(struct rtl8169_private *tp)
static void rtl_irq_enable(struct rtl8169_private *tp)
{
+ tp->irq_enabled = 1;
RTL_W16(tp, IntrMask, tp->irq_mask);
}
@@ -5457,7 +5461,7 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp)
tp->cp_cmd |= PktCntrDisable | INTT_1;
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
- RTL_W16(tp, IntrMitigate, 0x5151);
+ RTL_W16(tp, IntrMitigate, 0x5100);
/* Work around for RxFIFO overflow. */
if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
@@ -6520,9 +6524,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
{
struct rtl8169_private *tp = dev_instance;
u16 status = RTL_R16(tp, IntrStatus);
- u16 irq_mask = RTL_R16(tp, IntrMask);
- if (status == 0xffff || !(status & irq_mask))
+ if (!tp->irq_enabled || status == 0xffff || !(status & tp->irq_mask))
return IRQ_NONE;
if (unlikely(status & SYSErr)) {
@@ -6540,7 +6543,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
}
- if (status & RTL_EVENT_NAPI) {
+ if (status & (RTL_EVENT_NAPI | LinkChg)) {
rtl_irq_disable(tp);
napi_schedule_irqoff(&tp->napi);
}
@@ -7350,6 +7353,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
return rc;
+ /* Disable ASPM completely as that cause random device stop working
+ * problems as well as full system hangs for some PCIe devices users.
+ */
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+
/* enable device (incl. PCI PM wakeup and hotplug setup) */
rc = pcim_enable_device(pdev);
if (rc < 0) {
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 6073387511f8..67f9bb6e941b 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -730,10 +730,10 @@ static u16 sis900_default_phy(struct net_device * net_dev)
status = mdio_read(net_dev, phy->phy_addr, MII_STATUS);
/* Link ON & Not select default PHY & not ghost PHY */
- if ((status & MII_STAT_LINK) && !default_phy &&
- (phy->phy_types != UNKNOWN))
- default_phy = phy;
- else {
+ if ((status & MII_STAT_LINK) && !default_phy &&
+ (phy->phy_types != UNKNOWN)) {
+ default_phy = phy;
+ } else {
status = mdio_read(net_dev, phy->phy_addr, MII_CONTROL);
mdio_write(net_dev, phy->phy_addr, MII_CONTROL,
status | MII_CNTL_AUTO | MII_CNTL_ISOLATE);
@@ -741,7 +741,7 @@ static u16 sis900_default_phy(struct net_device * net_dev)
phy_home = phy;
else if(phy->phy_types == LAN)
phy_lan = phy;
- }
+ }
}
if (!default_phy && phy_home)
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
index 40d6356a7e73..3dfb07a78952 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
@@ -29,11 +29,13 @@
/* Specific functions used for Ring mode */
/* Enhanced descriptors */
-static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end)
+static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end,
+ int bfsize)
{
- p->des1 |= cpu_to_le32((BUF_SIZE_8KiB
- << ERDES1_BUFFER2_SIZE_SHIFT)
- & ERDES1_BUFFER2_SIZE_MASK);
+ if (bfsize == BUF_SIZE_16KiB)
+ p->des1 |= cpu_to_le32((BUF_SIZE_8KiB
+ << ERDES1_BUFFER2_SIZE_SHIFT)
+ & ERDES1_BUFFER2_SIZE_MASK);
if (end)
p->des1 |= cpu_to_le32(ERDES1_END_RING);
@@ -59,11 +61,15 @@ static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
}
/* Normal descriptors */
-static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end)
+static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end, int bfsize)
{
- p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1)
- << RDES1_BUFFER2_SIZE_SHIFT)
- & RDES1_BUFFER2_SIZE_MASK);
+ if (bfsize >= BUF_SIZE_2KiB) {
+ int bfsize2;
+
+ bfsize2 = min(bfsize - BUF_SIZE_2KiB + 1, BUF_SIZE_2KiB - 1);
+ p->des1 |= cpu_to_le32((bfsize2 << RDES1_BUFFER2_SIZE_SHIFT)
+ & RDES1_BUFFER2_SIZE_MASK);
+ }
if (end)
p->des1 |= cpu_to_le32(RDES1_END_RING);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 7fbb6a4dbf51..e061e9f5fad7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -296,7 +296,7 @@ exit:
}
static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
- int mode, int end)
+ int mode, int end, int bfsize)
{
dwmac4_set_rx_owner(p, disable_rx_ic);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 1d858fdec997..98fa471da7c0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -123,7 +123,7 @@ static int dwxgmac2_get_rx_timestamp_status(void *desc, void *next_desc,
}
static void dwxgmac2_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
- int mode, int end)
+ int mode, int end, int bfsize)
{
dwxgmac2_set_rx_owner(p, disable_rx_ic);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 5ef91a790f9d..5202d6ad7919 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -201,6 +201,11 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
if (unlikely(rdes0 & RDES0_OWN))
return dma_own;
+ if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
+ stats->rx_length_errors++;
+ return discard_frame;
+ }
+
if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) {
if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) {
x->rx_desc++;
@@ -231,9 +236,10 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
* It doesn't match with the information reported into the databook.
* At any rate, we need to understand if the CSUM hw computation is ok
* and report this info to the upper layers. */
- ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR),
- !!(rdes0 & RDES0_FRAME_TYPE),
- !!(rdes0 & ERDES0_RX_MAC_ADDR));
+ if (likely(ret == good_frame))
+ ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR),
+ !!(rdes0 & RDES0_FRAME_TYPE),
+ !!(rdes0 & ERDES0_RX_MAC_ADDR));
if (unlikely(rdes0 & RDES0_DRIBBLING))
x->dribbling_bit++;
@@ -259,15 +265,19 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
}
static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
- int mode, int end)
+ int mode, int end, int bfsize)
{
+ int bfsize1;
+
p->des0 |= cpu_to_le32(RDES0_OWN);
- p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK);
+
+ bfsize1 = min(bfsize, BUF_SIZE_8KiB);
+ p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK);
if (mode == STMMAC_CHAIN_MODE)
ehn_desc_rx_set_on_chain(p);
else
- ehn_desc_rx_set_on_ring(p, end);
+ ehn_desc_rx_set_on_ring(p, end, bfsize);
if (disable_rx_ic)
p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC);
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 92b8944f26e3..5bb00234d961 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -33,7 +33,7 @@ struct dma_extended_desc;
struct stmmac_desc_ops {
/* DMA RX descriptor ring initialization */
void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode,
- int end);
+ int end, int bfsize);
/* DMA TX descriptor ring initialization */
void (*init_tx_desc)(struct dma_desc *p, int mode, int end);
/* Invoked by the xmit function to prepare the tx descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index de65bb29feba..b7dd4e3c760d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -91,8 +91,6 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
return dma_own;
if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
- pr_warn("%s: Oversized frame spanned multiple buffers\n",
- __func__);
stats->rx_length_errors++;
return discard_frame;
}
@@ -135,15 +133,19 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
}
static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
- int end)
+ int end, int bfsize)
{
+ int bfsize1;
+
p->des0 |= cpu_to_le32(RDES0_OWN);
- p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK);
+
+ bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
+ p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK);
if (mode == STMMAC_CHAIN_MODE)
ndesc_rx_set_on_chain(p, end);
else
- ndesc_rx_set_on_ring(p, end);
+ ndesc_rx_set_on_ring(p, end, bfsize);
if (disable_rx_ic)
p->des1 |= cpu_to_le32(RDES1_DISABLE_IC);
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index d8c5bc412219..4d9bcb4d0378 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -59,7 +59,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum,
- STMMAC_RING_MODE, 1, false, skb->len);
+ STMMAC_RING_MODE, 0, false, skb->len);
tx_q->tx_skbuff[entry] = NULL;
entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
@@ -79,7 +79,8 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
stmmac_prepare_tx_desc(priv, desc, 0, len, csum,
- STMMAC_RING_MODE, 1, true, skb->len);
+ STMMAC_RING_MODE, 1, !skb_is_nonlinear(skb),
+ skb->len);
} else {
des2 = dma_map_single(priv->device, skb->data,
nopaged_len, DMA_TO_DEVICE);
@@ -91,7 +92,8 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum)
tx_q->tx_skbuff_dma[entry].is_jumbo = true;
desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum,
- STMMAC_RING_MODE, 1, true, skb->len);
+ STMMAC_RING_MODE, 0, !skb_is_nonlinear(skb),
+ skb->len);
}
tx_q->cur_tx = entry;
@@ -111,10 +113,11 @@ static unsigned int is_jumbo_frm(int len, int enh_desc)
static void refill_desc3(void *priv_ptr, struct dma_desc *p)
{
- struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr;
+ struct stmmac_rx_queue *rx_q = priv_ptr;
+ struct stmmac_priv *priv = rx_q->priv_data;
/* Fill DES3 in case of RING mode */
- if (priv->dma_buf_sz >= BUF_SIZE_8KiB)
+ if (priv->dma_buf_sz == BUF_SIZE_16KiB)
p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 97c5e1aad88f..a26e36dbb5df 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1136,11 +1136,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue)
if (priv->extend_desc)
stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic,
priv->use_riwt, priv->mode,
- (i == DMA_RX_SIZE - 1));
+ (i == DMA_RX_SIZE - 1),
+ priv->dma_buf_sz);
else
stmmac_init_rx_desc(priv, &rx_q->dma_rx[i],
priv->use_riwt, priv->mode,
- (i == DMA_RX_SIZE - 1));
+ (i == DMA_RX_SIZE - 1),
+ priv->dma_buf_sz);
}
/**
@@ -3216,14 +3218,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
stmmac_prepare_tx_desc(priv, first, 1, nopaged_len,
csum_insertion, priv->mode, 1, last_segment,
skb->len);
-
- /* The own bit must be the latest setting done when prepare the
- * descriptor and then barrier is needed to make sure that
- * all is coherent before granting the DMA engine.
- */
- wmb();
+ } else {
+ stmmac_set_tx_owner(priv, first);
}
+ /* The own bit must be the latest setting done when prepare the
+ * descriptor and then barrier is needed to make sure that
+ * all is coherent before granting the DMA engine.
+ */
+ wmb();
+
netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
stmmac_enable_dma_transmission(priv, priv->ioaddr);
@@ -3350,9 +3354,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
{
struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
struct stmmac_channel *ch = &priv->channel[queue];
- unsigned int entry = rx_q->cur_rx;
+ unsigned int next_entry = rx_q->cur_rx;
int coe = priv->hw->rx_csum;
- unsigned int next_entry;
unsigned int count = 0;
bool xmac;
@@ -3370,10 +3373,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
}
while (count < limit) {
- int status;
+ int entry, status;
struct dma_desc *p;
struct dma_desc *np;
+ entry = next_entry;
+
if (priv->extend_desc)
p = (struct dma_desc *)(rx_q->dma_erx + entry);
else
@@ -3429,11 +3434,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
* ignored
*/
if (frame_len > priv->dma_buf_sz) {
- netdev_err(priv->dev,
- "len %d larger than size (%d)\n",
- frame_len, priv->dma_buf_sz);
+ if (net_ratelimit())
+ netdev_err(priv->dev,
+ "len %d larger than size (%d)\n",
+ frame_len, priv->dma_buf_sz);
priv->dev->stats.rx_length_errors++;
- break;
+ continue;
}
/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
@@ -3468,7 +3474,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
dev_warn(priv->device,
"packet dropped\n");
priv->dev->stats.rx_dropped++;
- break;
+ continue;
}
dma_sync_single_for_cpu(priv->device,
@@ -3488,11 +3494,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
} else {
skb = rx_q->rx_skbuff[entry];
if (unlikely(!skb)) {
- netdev_err(priv->dev,
- "%s: Inconsistent Rx chain\n",
- priv->dev->name);
+ if (net_ratelimit())
+ netdev_err(priv->dev,
+ "%s: Inconsistent Rx chain\n",
+ priv->dev->name);
priv->dev->stats.rx_dropped++;
- break;
+ continue;
}
prefetch(skb->data - NET_IP_ALIGN);
rx_q->rx_skbuff[entry] = NULL;
@@ -3527,7 +3534,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
priv->dev->stats.rx_packets++;
priv->dev->stats.rx_bytes += frame_len;
}
- entry = next_entry;
}
stmmac_rx_refill(priv, queue);
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 5174d318901e..0a920c5936b2 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3657,12 +3657,16 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
ret = netcp_txpipe_init(&gbe_dev->tx_pipe, netcp_device,
gbe_dev->dma_chan_name, gbe_dev->tx_queue_id);
- if (ret)
+ if (ret) {
+ of_node_put(interfaces);
return ret;
+ }
ret = netcp_txpipe_open(&gbe_dev->tx_pipe);
- if (ret)
+ if (ret) {
+ of_node_put(interfaces);
return ret;
+ }
/* Create network interfaces */
INIT_LIST_HEAD(&gbe_dev->gbe_intf_head);
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index ec7e7ec24ff9..4041c75997ba 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1575,12 +1575,14 @@ static int axienet_probe(struct platform_device *pdev)
ret = of_address_to_resource(np, 0, &dmares);
if (ret) {
dev_err(&pdev->dev, "unable to get DMA resource\n");
+ of_node_put(np);
goto free_netdev;
}
lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares);
if (IS_ERR(lp->dma_regs)) {
dev_err(&pdev->dev, "could not map DMA regs\n");
ret = PTR_ERR(lp->dma_regs);
+ of_node_put(np);
goto free_netdev;
}
lp->rx_irq = irq_of_parse_and_map(np, 1);
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index e859ae2e42d5..49f41b64077b 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -987,6 +987,7 @@ struct netvsc_device {
wait_queue_head_t wait_drain;
bool destroy;
+ bool tx_disable; /* if true, do not wake up queue again */
/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 813d195bbd57..e0dce373cdd9 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -110,6 +110,7 @@ static struct netvsc_device *alloc_net_device(void)
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
+ net_device->tx_disable = false;
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
@@ -719,7 +720,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
} else {
struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
- if (netif_tx_queue_stopped(txq) &&
+ if (netif_tx_queue_stopped(txq) && !net_device->tx_disable &&
(hv_get_avail_to_write_percent(&channel->outbound) >
RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) {
netif_tx_wake_queue(txq);
@@ -874,7 +875,8 @@ static inline int netvsc_send_pkt(
} else if (ret == -EAGAIN) {
netif_tx_stop_queue(txq);
ndev_ctx->eth_stats.stop_queue++;
- if (atomic_read(&nvchan->queue_sends) < 1) {
+ if (atomic_read(&nvchan->queue_sends) < 1 &&
+ !net_device->tx_disable) {
netif_tx_wake_queue(txq);
ndev_ctx->eth_stats.wake_queue++;
ret = -ENOSPC;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index cf4897043e83..b20fb0fb595b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -109,6 +109,15 @@ static void netvsc_set_rx_mode(struct net_device *net)
rcu_read_unlock();
}
+static void netvsc_tx_enable(struct netvsc_device *nvscdev,
+ struct net_device *ndev)
+{
+ nvscdev->tx_disable = false;
+ virt_wmb(); /* ensure queue wake up mechanism is on */
+
+ netif_tx_wake_all_queues(ndev);
+}
+
static int netvsc_open(struct net_device *net)
{
struct net_device_context *ndev_ctx = netdev_priv(net);
@@ -129,7 +138,7 @@ static int netvsc_open(struct net_device *net)
rdev = nvdev->extension;
if (!rdev->link_state) {
netif_carrier_on(net);
- netif_tx_wake_all_queues(net);
+ netvsc_tx_enable(nvdev, net);
}
if (vf_netdev) {
@@ -184,6 +193,17 @@ static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
}
}
+static void netvsc_tx_disable(struct netvsc_device *nvscdev,
+ struct net_device *ndev)
+{
+ if (nvscdev) {
+ nvscdev->tx_disable = true;
+ virt_wmb(); /* ensure txq will not wake up after stop */
+ }
+
+ netif_tx_disable(ndev);
+}
+
static int netvsc_close(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -192,7 +212,7 @@ static int netvsc_close(struct net_device *net)
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
int ret;
- netif_tx_disable(net);
+ netvsc_tx_disable(nvdev, net);
/* No need to close rndis filter if it is removed already */
if (!nvdev)
@@ -920,7 +940,7 @@ static int netvsc_detach(struct net_device *ndev,
/* If device was up (receiving) then shutdown */
if (netif_running(ndev)) {
- netif_tx_disable(ndev);
+ netvsc_tx_disable(nvdev, ndev);
ret = rndis_filter_close(nvdev);
if (ret) {
@@ -1908,7 +1928,7 @@ static void netvsc_link_change(struct work_struct *w)
if (rdev->link_state) {
rdev->link_state = false;
netif_carrier_on(net);
- netif_tx_wake_all_queues(net);
+ netvsc_tx_enable(net_device, net);
} else {
notify = true;
}
@@ -1918,7 +1938,7 @@ static void netvsc_link_change(struct work_struct *w)
if (!rdev->link_state) {
rdev->link_state = true;
netif_carrier_off(net);
- netif_tx_stop_all_queues(net);
+ netvsc_tx_disable(net_device, net);
}
kfree(event);
break;
@@ -1927,7 +1947,7 @@ static void netvsc_link_change(struct work_struct *w)
if (!rdev->link_state) {
rdev->link_state = true;
netif_carrier_off(net);
- netif_tx_stop_all_queues(net);
+ netvsc_tx_disable(net_device, net);
event->event = RNDIS_STATUS_MEDIA_CONNECT;
spin_lock_irqsave(&ndev_ctx->lock, flags);
list_add(&event->list, &ndev_ctx->reconfig_events);
diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c
index cd1d8faccca5..cd6b95e673a5 100644
--- a/drivers/net/ieee802154/adf7242.c
+++ b/drivers/net/ieee802154/adf7242.c
@@ -1268,6 +1268,10 @@ static int adf7242_probe(struct spi_device *spi)
INIT_DELAYED_WORK(&lp->work, adf7242_rx_cal_work);
lp->wqueue = alloc_ordered_workqueue(dev_name(&spi->dev),
WQ_MEM_RECLAIM);
+ if (unlikely(!lp->wqueue)) {
+ ret = -ENOMEM;
+ goto err_hw_init;
+ }
ret = adf7242_hw_init(lp);
if (ret)
diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index b6743f03dce0..3b88846de31b 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -324,7 +324,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
goto out_err;
}
- genlmsg_reply(skb, info);
+ res = genlmsg_reply(skb, info);
break;
}
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 071869db44cf..520657945b82 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -7,6 +7,8 @@ menuconfig MDIO_DEVICE
help
MDIO devices and driver infrastructure code.
+if MDIO_DEVICE
+
config MDIO_BUS
tristate
default m if PHYLIB=m
@@ -179,6 +181,7 @@ config MDIO_XGENE
APM X-Gene SoC's.
endif
+endif
config PHYLINK
tristate
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 9605d4fe540b..cb86a3e90c7d 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -323,6 +323,19 @@ static int bcm54xx_config_init(struct phy_device *phydev)
bcm54xx_phydsp_config(phydev);
+ /* Encode link speed into LED1 and LED3 pair (green/amber).
+ * Also flash these two LEDs on activity. This means configuring
+ * them for MULTICOLOR and encoding link/activity into them.
+ */
+ val = BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_MULTICOLOR1) |
+ BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_MULTICOLOR1);
+ bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1, val);
+
+ val = BCM_LED_MULTICOLOR_IN_PHASE |
+ BCM5482_SHD_LEDS1_LED1(BCM_LED_MULTICOLOR_LINK_ACT) |
+ BCM5482_SHD_LEDS1_LED3(BCM_LED_MULTICOLOR_LINK_ACT);
+ bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val);
+
return 0;
}
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index bbd8c22067f3..97d45bd5b38e 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -15,6 +15,8 @@
#include <linux/netdevice.h>
#define DP83822_PHY_ID 0x2000a240
+#define DP83825I_PHY_ID 0x2000a150
+
#define DP83822_DEVADDR 0x1f
#define MII_DP83822_PHYSCR 0x11
@@ -304,26 +306,30 @@ static int dp83822_resume(struct phy_device *phydev)
return 0;
}
+#define DP83822_PHY_DRIVER(_id, _name) \
+ { \
+ PHY_ID_MATCH_MODEL(_id), \
+ .name = (_name), \
+ .features = PHY_BASIC_FEATURES, \
+ .soft_reset = dp83822_phy_reset, \
+ .config_init = dp83822_config_init, \
+ .get_wol = dp83822_get_wol, \
+ .set_wol = dp83822_set_wol, \
+ .ack_interrupt = dp83822_ack_interrupt, \
+ .config_intr = dp83822_config_intr, \
+ .suspend = dp83822_suspend, \
+ .resume = dp83822_resume, \
+ }
+
static struct phy_driver dp83822_driver[] = {
- {
- .phy_id = DP83822_PHY_ID,
- .phy_id_mask = 0xfffffff0,
- .name = "TI DP83822",
- .features = PHY_BASIC_FEATURES,
- .config_init = dp83822_config_init,
- .soft_reset = dp83822_phy_reset,
- .get_wol = dp83822_get_wol,
- .set_wol = dp83822_set_wol,
- .ack_interrupt = dp83822_ack_interrupt,
- .config_intr = dp83822_config_intr,
- .suspend = dp83822_suspend,
- .resume = dp83822_resume,
- },
+ DP83822_PHY_DRIVER(DP83822_PHY_ID, "TI DP83822"),
+ DP83822_PHY_DRIVER(DP83825I_PHY_ID, "TI DP83825I"),
};
module_phy_driver(dp83822_driver);
static struct mdio_device_id __maybe_unused dp83822_tbl[] = {
{ DP83822_PHY_ID, 0xfffffff0 },
+ { DP83825I_PHY_ID, 0xfffffff0 },
{ },
};
MODULE_DEVICE_TABLE(mdio, dp83822_tbl);
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
index a238388eb1a5..0eec2913c289 100644
--- a/drivers/net/phy/meson-gxl.c
+++ b/drivers/net/phy/meson-gxl.c
@@ -201,6 +201,7 @@ static int meson_gxl_ack_interrupt(struct phy_device *phydev)
static int meson_gxl_config_intr(struct phy_device *phydev)
{
u16 val;
+ int ret;
if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
val = INTSRC_ANEG_PR
@@ -213,6 +214,11 @@ static int meson_gxl_config_intr(struct phy_device *phydev)
val = 0;
}
+ /* Ack any pending IRQ */
+ ret = meson_gxl_ack_interrupt(phydev);
+ if (ret)
+ return ret;
+
return phy_write(phydev, INTSRC_MASK, val);
}
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 49fdd1ee798e..77068c545de0 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1831,7 +1831,7 @@ int genphy_soft_reset(struct phy_device *phydev)
{
int ret;
- ret = phy_write(phydev, MII_BMCR, BMCR_RESET);
+ ret = phy_set_bits(phydev, MII_BMCR, BMCR_RESET);
if (ret < 0)
return ret;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 1d68921723dc..e9ca1c088d0b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1763,9 +1763,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
int skb_xdp = 1;
bool frags = tun_napi_frags_enabled(tfile);
- if (!(tun->dev->flags & IFF_UP))
- return -EIO;
-
if (!(tun->flags & IFF_NO_PI)) {
if (len < sizeof(pi))
return -EINVAL;
@@ -1867,6 +1864,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
err = skb_copy_datagram_from_iter(skb, 0, from, len);
if (err) {
+ err = -EFAULT;
+drop:
this_cpu_inc(tun->pcpu_stats->rx_dropped);
kfree_skb(skb);
if (frags) {
@@ -1874,7 +1873,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
mutex_unlock(&tfile->napi_mutex);
}
- return -EFAULT;
+ return err;
}
}
@@ -1958,6 +1957,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
!tfile->detached)
rxhash = __skb_get_hash_symmetric(skb);
+ rcu_read_lock();
+ if (unlikely(!(tun->dev->flags & IFF_UP))) {
+ err = -EIO;
+ rcu_read_unlock();
+ goto drop;
+ }
+
if (frags) {
/* Exercise flow dissector code path. */
u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb));
@@ -1965,6 +1971,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (unlikely(headlen > skb_headlen(skb))) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
napi_free_frags(&tfile->napi);
+ rcu_read_unlock();
mutex_unlock(&tfile->napi_mutex);
WARN_ON(1);
return -ENOMEM;
@@ -1992,6 +1999,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
} else {
netif_rx_ni(skb);
}
+ rcu_read_unlock();
stats = get_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c
index 820a2fe7d027..aff995be2a31 100644
--- a/drivers/net/usb/aqc111.c
+++ b/drivers/net/usb/aqc111.c
@@ -1301,6 +1301,20 @@ static const struct driver_info trendnet_info = {
.tx_fixup = aqc111_tx_fixup,
};
+static const struct driver_info qnap_info = {
+ .description = "QNAP QNA-UC5G1T USB to 5GbE Adapter",
+ .bind = aqc111_bind,
+ .unbind = aqc111_unbind,
+ .status = aqc111_status,
+ .link_reset = aqc111_link_reset,
+ .reset = aqc111_reset,
+ .stop = aqc111_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX |
+ FLAG_AVOID_UNLINK_URBS | FLAG_MULTI_PACKET,
+ .rx_fixup = aqc111_rx_fixup,
+ .tx_fixup = aqc111_tx_fixup,
+};
+
static int aqc111_suspend(struct usb_interface *intf, pm_message_t message)
{
struct usbnet *dev = usb_get_intfdata(intf);
@@ -1455,6 +1469,7 @@ static const struct usb_device_id products[] = {
{AQC111_USB_ETH_DEV(0x0b95, 0x2790, asix111_info)},
{AQC111_USB_ETH_DEV(0x0b95, 0x2791, asix112_info)},
{AQC111_USB_ETH_DEV(0x20f4, 0xe05a, trendnet_info)},
+ {AQC111_USB_ETH_DEV(0x1c04, 0x0015, qnap_info)},
{ },/* END */
};
MODULE_DEVICE_TABLE(usb, products);
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 5512a1038721..3e9b2c319e45 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -851,6 +851,14 @@ static const struct usb_device_id products[] = {
.driver_info = 0,
},
+/* QNAP QNA-UC5G1T USB to 5GbE Adapter (based on AQC111U) */
+{
+ USB_DEVICE_AND_INTERFACE_INFO(0x1c04, 0x0015, USB_CLASS_COMM,
+ USB_CDC_SUBCLASS_ETHERNET,
+ USB_CDC_PROTO_NONE),
+ .driver_info = 0,
+},
+
/* WHITELIST!!!
*
* CDC Ether uses two interfaces, not necessarily consecutive.
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 74bebbdb4b15..9195f3476b1d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1203,6 +1203,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */
{QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */
{QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */
+ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */
{QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */
{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */
{QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 7c1430ed0244..cd15c32b2e43 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1273,9 +1273,14 @@ static void vrf_setup(struct net_device *dev)
/* default to no qdisc; user can add if desired */
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->priv_flags |= IFF_NO_RX_HANDLER;
- dev->min_mtu = 0;
- dev->max_mtu = 0;
+ /* VRF devices do not care about MTU, but if the MTU is set
+ * too low then the ipv4 and ipv6 protocols are disabled
+ * which breaks networking.
+ */
+ dev->min_mtu = IPV6_MIN_MTU;
+ dev->max_mtu = ETH_MAX_MTU;
}
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 077f1b9f2761..d76dfed8d9bb 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -4335,10 +4335,8 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
/* If vxlan->dev is in the same netns, it has already been added
* to the list by the previous loop.
*/
- if (!net_eq(dev_net(vxlan->dev), net)) {
- gro_cells_destroy(&vxlan->gro_cells);
+ if (!net_eq(dev_net(vxlan->dev), net))
unregister_netdevice_queue(vxlan->dev, head);
- }
}
for (h = 0; h < PORT_HASH_SIZE; ++h)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index e9822a3ec373..94132cfd1f56 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -460,9 +460,7 @@ static int iwl_mvm_ftm_range_resp_valid(struct iwl_mvm *mvm, u8 request_id,
static void iwl_mvm_debug_range_resp(struct iwl_mvm *mvm, u8 index,
struct cfg80211_pmsr_result *res)
{
- s64 rtt_avg = res->ftm.rtt_avg * 100;
-
- do_div(rtt_avg, 6666);
+ s64 rtt_avg = div_s64(res->ftm.rtt_avg * 100, 6666);
IWL_DEBUG_INFO(mvm, "entry %d\n", index);
IWL_DEBUG_INFO(mvm, "\tstatus: %d\n", res->status);
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 6eedc0ec7661..76629b98c78d 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -130,6 +130,8 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
static void
mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
{
+ iowrite32(q->desc_dma, &q->regs->desc_base);
+ iowrite32(q->ndesc, &q->regs->ring_size);
q->head = ioread32(&q->regs->dma_idx);
q->tail = q->head;
iowrite32(q->head, &q->regs->cpu_idx);
@@ -180,7 +182,10 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush)
else
mt76_dma_sync_idx(dev, q);
- wake = wake && qid < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8;
+ wake = wake && q->stopped &&
+ qid < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8;
+ if (wake)
+ q->stopped = false;
if (!q->queued)
wake_up(&dev->tx_wait);
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index a033745adb2f..316167404729 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -679,19 +679,15 @@ out:
return ret;
}
-static void
-mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif,
- struct ieee80211_sta *sta)
+void __mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta)
{
struct mt76_wcid *wcid = (struct mt76_wcid *)sta->drv_priv;
- int idx = wcid->idx;
- int i;
+ int i, idx = wcid->idx;
rcu_assign_pointer(dev->wcid[idx], NULL);
synchronize_rcu();
- mutex_lock(&dev->mutex);
-
if (dev->drv->sta_remove)
dev->drv->sta_remove(dev, vif, sta);
@@ -699,7 +695,15 @@ mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif,
for (i = 0; i < ARRAY_SIZE(sta->txq); i++)
mt76_txq_remove(dev, sta->txq[i]);
mt76_wcid_free(dev->wcid_mask, idx);
+}
+EXPORT_SYMBOL_GPL(__mt76_sta_remove);
+static void
+mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta)
+{
+ mutex_lock(&dev->mutex);
+ __mt76_sta_remove(dev, vif, sta);
mutex_unlock(&dev->mutex);
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 5dfb0601f101..bcbfd3c4a44b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -126,6 +126,7 @@ struct mt76_queue {
int ndesc;
int queued;
int buf_size;
+ bool stopped;
u8 buf_offset;
u8 hw_idx;
@@ -143,6 +144,7 @@ struct mt76_mcu_ops {
const struct mt76_reg_pair *rp, int len);
int (*mcu_rd_rp)(struct mt76_dev *dev, u32 base,
struct mt76_reg_pair *rp, int len);
+ int (*mcu_restart)(struct mt76_dev *dev);
};
struct mt76_queue_ops {
@@ -693,6 +695,8 @@ int mt76_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
enum ieee80211_sta_state old_state,
enum ieee80211_sta_state new_state);
+void __mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta);
struct ieee80211_sta *mt76_rx_convert(struct sk_buff *skb);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c b/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c
index afcd86f735b4..4dcb465095d1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c
@@ -135,8 +135,7 @@ void mt7603_pre_tbtt_tasklet(unsigned long arg)
out:
mt76_queue_tx_cleanup(dev, MT_TXQ_BEACON, false);
- if (dev->mt76.q_tx[MT_TXQ_BEACON].queued >
- __sw_hweight8(dev->beacon_mask))
+ if (dev->mt76.q_tx[MT_TXQ_BEACON].queued > hweight8(dev->beacon_mask))
dev->beacon_check++;
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
index d69e82c66ab2..b3ae0aaea62a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
@@ -27,12 +27,16 @@ static void
mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb)
{
__le32 *txd = (__le32 *)skb->data;
+ struct ieee80211_hdr *hdr;
+ struct ieee80211_sta *sta;
struct mt7603_sta *msta;
struct mt76_wcid *wcid;
+ void *priv;
int idx;
u32 val;
+ u8 tid;
- if (skb->len < sizeof(MT_TXD_SIZE) + sizeof(struct ieee80211_hdr))
+ if (skb->len < MT_TXD_SIZE + sizeof(struct ieee80211_hdr))
goto free;
val = le32_to_cpu(txd[1]);
@@ -46,10 +50,19 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb)
if (!wcid)
goto free;
- msta = container_of(wcid, struct mt7603_sta, wcid);
+ priv = msta = container_of(wcid, struct mt7603_sta, wcid);
val = le32_to_cpu(txd[0]);
skb_set_queue_mapping(skb, FIELD_GET(MT_TXD0_Q_IDX, val));
+ val &= ~(MT_TXD0_P_IDX | MT_TXD0_Q_IDX);
+ val |= FIELD_PREP(MT_TXD0_Q_IDX, MT_TX_HW_QUEUE_MGMT);
+ txd[0] = cpu_to_le32(val);
+
+ sta = container_of(priv, struct ieee80211_sta, drv_priv);
+ hdr = (struct ieee80211_hdr *) &skb->data[MT_TXD_SIZE];
+ tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+ ieee80211_sta_set_buffered(sta, tid, true);
+
spin_lock_bh(&dev->ps_lock);
__skb_queue_tail(&msta->psq, skb);
if (skb_queue_len(&msta->psq) >= 64) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
index 15cc8f33b34d..d54dda67d036 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
@@ -112,7 +112,7 @@ static void
mt7603_phy_init(struct mt7603_dev *dev)
{
int rx_chains = dev->mt76.antenna_mask;
- int tx_chains = __sw_hweight8(rx_chains) - 1;
+ int tx_chains = hweight8(rx_chains) - 1;
mt76_rmw(dev, MT_WF_RMAC_RMCR,
(MT_WF_RMAC_RMCR_SMPS_MODE |
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 0a0115861b51..5e31d7da96fc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1072,7 +1072,7 @@ out:
case MT_PHY_TYPE_HT:
final_rate_flags |= IEEE80211_TX_RC_MCS;
final_rate &= GENMASK(5, 0);
- if (i > 15)
+ if (final_rate > 15)
return false;
break;
default:
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
index b10775ed92e6..cc0fe0933b2d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/module.h>
#include "mt7603.h"
+#include "mac.h"
#include "eeprom.h"
static int
@@ -386,6 +387,15 @@ mt7603_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps)
}
static void
+mt7603_ps_set_more_data(struct sk_buff *skb)
+{
+ struct ieee80211_hdr *hdr;
+
+ hdr = (struct ieee80211_hdr *) &skb->data[MT_TXD_SIZE];
+ hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+}
+
+static void
mt7603_release_buffered_frames(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
u16 tids, int nframes,
@@ -399,6 +409,8 @@ mt7603_release_buffered_frames(struct ieee80211_hw *hw,
__skb_queue_head_init(&list);
+ mt7603_wtbl_set_ps(dev, msta, false);
+
spin_lock_bh(&dev->ps_lock);
skb_queue_walk_safe(&msta->psq, skb, tmp) {
if (!nframes)
@@ -409,11 +421,15 @@ mt7603_release_buffered_frames(struct ieee80211_hw *hw,
skb_set_queue_mapping(skb, MT_TXQ_PSD);
__skb_unlink(skb, &msta->psq);
+ mt7603_ps_set_more_data(skb);
__skb_queue_tail(&list, skb);
nframes--;
}
spin_unlock_bh(&dev->ps_lock);
+ if (!skb_queue_empty(&list))
+ ieee80211_sta_eosp(sta);
+
mt7603_ps_tx_list(dev, &list);
if (nframes)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
index 4b0713f1fd5e..d06905ea8cc6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
@@ -433,7 +433,7 @@ int mt7603_mcu_set_channel(struct mt7603_dev *dev)
{
struct cfg80211_chan_def *chandef = &dev->mt76.chandef;
struct ieee80211_hw *hw = mt76_hw(dev);
- int n_chains = __sw_hweight8(dev->mt76.antenna_mask);
+ int n_chains = hweight8(dev->mt76.antenna_mask);
struct {
u8 control_chan;
u8 center_chan;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/soc.c b/drivers/net/wireless/mediatek/mt76/mt7603/soc.c
index e13fea80d970..b920be1f5718 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/soc.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/soc.c
@@ -23,9 +23,9 @@ mt76_wmac_probe(struct platform_device *pdev)
}
mem_base = devm_ioremap_resource(&pdev->dev, res);
- if (!mem_base) {
+ if (IS_ERR(mem_base)) {
dev_err(&pdev->dev, "Failed to get memory resource\n");
- return -EINVAL;
+ return PTR_ERR(mem_base);
}
mdev = mt76_alloc_device(&pdev->dev, sizeof(*dev), &mt7603_ops,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h b/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h
index 0290ba5869a5..736f81752b5b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h
@@ -46,7 +46,7 @@ static const struct mt76_reg_pair common_mac_reg_table[] = {
{ MT_MM20_PROT_CFG, 0x01742004 },
{ MT_MM40_PROT_CFG, 0x03f42084 },
{ MT_TXOP_CTRL_CFG, 0x0000583f },
- { MT_TX_RTS_CFG, 0x00092b20 },
+ { MT_TX_RTS_CFG, 0x00ffff20 },
{ MT_EXP_ACK_TIME, 0x002400ca },
{ MT_TXOP_HLDR_ET, 0x00000002 },
{ MT_XIFS_TIME_CFG, 0x33a41010 },
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
index 91718647da02..e5a06f74a6f7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
@@ -229,7 +229,7 @@ static int mt76x0u_probe(struct usb_interface *usb_intf,
struct usb_device *usb_dev = interface_to_usbdev(usb_intf);
struct mt76x02_dev *dev;
struct mt76_dev *mdev;
- u32 asic_rev, mac_rev;
+ u32 mac_rev;
int ret;
mdev = mt76_alloc_device(&usb_intf->dev, sizeof(*dev), &mt76x0u_ops,
@@ -262,10 +262,14 @@ static int mt76x0u_probe(struct usb_interface *usb_intf,
goto err;
}
- asic_rev = mt76_rr(dev, MT_ASIC_VERSION);
+ mdev->rev = mt76_rr(dev, MT_ASIC_VERSION);
mac_rev = mt76_rr(dev, MT_MAC_CSR0);
dev_info(mdev->dev, "ASIC revision: %08x MAC revision: %08x\n",
- asic_rev, mac_rev);
+ mdev->rev, mac_rev);
+ if (!is_mt76x0(dev)) {
+ ret = -ENODEV;
+ goto err;
+ }
/* Note: vendor driver skips this check for MT76X0U */
if (!(mt76_rr(dev, MT_EFUSE_CTRL) & MT_EFUSE_CTRL_SEL))
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h
index 6915cce5def9..07061eb4d1e1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h
@@ -51,6 +51,7 @@ struct mt76x02_calibration {
u16 false_cca;
s8 avg_rssi_all;
s8 agc_gain_adjust;
+ s8 agc_lowest_gain;
s8 low_gain;
s8 temp_vco;
@@ -114,8 +115,11 @@ struct mt76x02_dev {
struct mt76x02_dfs_pattern_detector dfs_pd;
/* edcca monitor */
+ unsigned long ed_trigger_timeout;
bool ed_tx_blocked;
bool ed_monitor;
+ u8 ed_monitor_enabled;
+ u8 ed_monitor_learning;
u8 ed_trigger;
u8 ed_silent;
ktime_t ed_time;
@@ -188,6 +192,13 @@ void mt76x02_mac_start(struct mt76x02_dev *dev);
void mt76x02_init_debugfs(struct mt76x02_dev *dev);
+static inline bool is_mt76x0(struct mt76x02_dev *dev)
+{
+ return mt76_chip(&dev->mt76) == 0x7610 ||
+ mt76_chip(&dev->mt76) == 0x7630 ||
+ mt76_chip(&dev->mt76) == 0x7650;
+}
+
static inline bool is_mt76x2(struct mt76x02_dev *dev)
{
return mt76_chip(&dev->mt76) == 0x7612 ||
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c
index 7580c5c986ff..b1d6fd4861e3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c
@@ -116,6 +116,32 @@ static int read_agc(struct seq_file *file, void *data)
return 0;
}
+static int
+mt76_edcca_set(void *data, u64 val)
+{
+ struct mt76x02_dev *dev = data;
+ enum nl80211_dfs_regions region = dev->dfs_pd.region;
+
+ dev->ed_monitor_enabled = !!val;
+ dev->ed_monitor = dev->ed_monitor_enabled &&
+ region == NL80211_DFS_ETSI;
+ mt76x02_edcca_init(dev, true);
+
+ return 0;
+}
+
+static int
+mt76_edcca_get(void *data, u64 *val)
+{
+ struct mt76x02_dev *dev = data;
+
+ *val = dev->ed_monitor_enabled;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_edcca, mt76_edcca_get, mt76_edcca_set,
+ "%lld\n");
+
void mt76x02_init_debugfs(struct mt76x02_dev *dev)
{
struct dentry *dir;
@@ -127,6 +153,7 @@ void mt76x02_init_debugfs(struct mt76x02_dev *dev)
debugfs_create_u8("temperature", 0400, dir, &dev->cal.temp);
debugfs_create_bool("tpc", 0600, dir, &dev->enable_tpc);
+ debugfs_create_file("edcca", 0400, dir, dev, &fops_edcca);
debugfs_create_file("ampdu_stat", 0400, dir, dev, &fops_ampdu_stat);
debugfs_create_file("dfs_stats", 0400, dir, dev, &fops_dfs_stat);
debugfs_create_devm_seqfile(dev->mt76.dev, "txpower", dir,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c
index e4649103efd4..17d12d212d1b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c
@@ -885,7 +885,8 @@ mt76x02_dfs_set_domain(struct mt76x02_dev *dev,
if (dfs_pd->region != region) {
tasklet_disable(&dfs_pd->dfs_tasklet);
- dev->ed_monitor = region == NL80211_DFS_ETSI;
+ dev->ed_monitor = dev->ed_monitor_enabled &&
+ region == NL80211_DFS_ETSI;
mt76x02_edcca_init(dev, true);
dfs_pd->region = region;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index 91ff6598eccf..9ed231abe916 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -67,12 +67,39 @@ int mt76x02_mac_shared_key_setup(struct mt76x02_dev *dev, u8 vif_idx,
}
EXPORT_SYMBOL_GPL(mt76x02_mac_shared_key_setup);
+void mt76x02_mac_wcid_sync_pn(struct mt76x02_dev *dev, u8 idx,
+ struct ieee80211_key_conf *key)
+{
+ enum mt76x02_cipher_type cipher;
+ u8 key_data[32];
+ u32 iv, eiv;
+ u64 pn;
+
+ cipher = mt76x02_mac_get_key_info(key, key_data);
+ iv = mt76_rr(dev, MT_WCID_IV(idx));
+ eiv = mt76_rr(dev, MT_WCID_IV(idx) + 4);
+
+ pn = (u64)eiv << 16;
+ if (cipher == MT_CIPHER_TKIP) {
+ pn |= (iv >> 16) & 0xff;
+ pn |= (iv & 0xff) << 8;
+ } else if (cipher >= MT_CIPHER_AES_CCMP) {
+ pn |= iv & 0xffff;
+ } else {
+ return;
+ }
+
+ atomic64_set(&key->tx_pn, pn);
+}
+
+
int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx,
struct ieee80211_key_conf *key)
{
enum mt76x02_cipher_type cipher;
u8 key_data[32];
u8 iv_data[8];
+ u64 pn;
cipher = mt76x02_mac_get_key_info(key, key_data);
if (cipher == MT_CIPHER_NONE && key)
@@ -85,9 +112,22 @@ int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx,
if (key) {
mt76_rmw_field(dev, MT_WCID_ATTR(idx), MT_WCID_ATTR_PAIRWISE,
!!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE));
+
+ pn = atomic64_read(&key->tx_pn);
+
iv_data[3] = key->keyidx << 6;
- if (cipher >= MT_CIPHER_TKIP)
+ if (cipher >= MT_CIPHER_TKIP) {
iv_data[3] |= 0x20;
+ put_unaligned_le32(pn >> 16, &iv_data[4]);
+ }
+
+ if (cipher == MT_CIPHER_TKIP) {
+ iv_data[0] = (pn >> 8) & 0xff;
+ iv_data[1] = (iv_data[0] | 0x20) & 0x7f;
+ iv_data[2] = pn & 0xff;
+ } else if (cipher >= MT_CIPHER_AES_CCMP) {
+ put_unaligned_le16((pn & 0xffff), &iv_data[0]);
+ }
}
mt76_wr_copy(dev, MT_WCID_IV(idx), iv_data, sizeof(iv_data));
@@ -920,6 +960,7 @@ void mt76x02_edcca_init(struct mt76x02_dev *dev, bool enable)
}
}
mt76x02_edcca_tx_enable(dev, true);
+ dev->ed_monitor_learning = true;
/* clear previous CCA timer value */
mt76_rr(dev, MT_ED_CCA_TIMER);
@@ -929,6 +970,10 @@ EXPORT_SYMBOL_GPL(mt76x02_edcca_init);
#define MT_EDCCA_TH 92
#define MT_EDCCA_BLOCK_TH 2
+#define MT_EDCCA_LEARN_TH 50
+#define MT_EDCCA_LEARN_CCA 180
+#define MT_EDCCA_LEARN_TIMEOUT (20 * HZ)
+
static void mt76x02_edcca_check(struct mt76x02_dev *dev)
{
ktime_t cur_time;
@@ -951,11 +996,23 @@ static void mt76x02_edcca_check(struct mt76x02_dev *dev)
dev->ed_trigger = 0;
}
- if (dev->ed_trigger > MT_EDCCA_BLOCK_TH &&
- !dev->ed_tx_blocked)
+ if (dev->cal.agc_lowest_gain &&
+ dev->cal.false_cca > MT_EDCCA_LEARN_CCA &&
+ dev->ed_trigger > MT_EDCCA_LEARN_TH) {
+ dev->ed_monitor_learning = false;
+ dev->ed_trigger_timeout = jiffies + 20 * HZ;
+ } else if (!dev->ed_monitor_learning &&
+ time_is_after_jiffies(dev->ed_trigger_timeout)) {
+ dev->ed_monitor_learning = true;
+ mt76x02_edcca_tx_enable(dev, true);
+ }
+
+ if (dev->ed_monitor_learning)
+ return;
+
+ if (dev->ed_trigger > MT_EDCCA_BLOCK_TH && !dev->ed_tx_blocked)
mt76x02_edcca_tx_enable(dev, false);
- else if (dev->ed_silent > MT_EDCCA_BLOCK_TH &&
- dev->ed_tx_blocked)
+ else if (dev->ed_silent > MT_EDCCA_BLOCK_TH && dev->ed_tx_blocked)
mt76x02_edcca_tx_enable(dev, true);
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h
index 6b1f25d2f64c..caeeef96c42f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h
@@ -177,6 +177,8 @@ int mt76x02_mac_shared_key_setup(struct mt76x02_dev *dev, u8 vif_idx,
u8 key_idx, struct ieee80211_key_conf *key);
int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx,
struct ieee80211_key_conf *key);
+void mt76x02_mac_wcid_sync_pn(struct mt76x02_dev *dev, u8 idx,
+ struct ieee80211_key_conf *key);
void mt76x02_mac_wcid_setup(struct mt76x02_dev *dev, u8 idx, u8 vif_idx,
u8 *mac);
void mt76x02_mac_wcid_set_drop(struct mt76x02_dev *dev, u8 idx, bool drop);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index 1229f19f2b02..daaed1220147 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -19,6 +19,7 @@
#include <linux/irq.h>
#include "mt76x02.h"
+#include "mt76x02_mcu.h"
#include "mt76x02_trace.h"
struct beacon_bc_data {
@@ -418,9 +419,66 @@ static bool mt76x02_tx_hang(struct mt76x02_dev *dev)
return i < 4;
}
+static void mt76x02_key_sync(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key, void *data)
+{
+ struct mt76x02_dev *dev = hw->priv;
+ struct mt76_wcid *wcid;
+
+ if (!sta)
+ return;
+
+ wcid = (struct mt76_wcid *) sta->drv_priv;
+
+ if (wcid->hw_key_idx != key->keyidx || wcid->sw_iv)
+ return;
+
+ mt76x02_mac_wcid_sync_pn(dev, wcid->idx, key);
+}
+
+static void mt76x02_reset_state(struct mt76x02_dev *dev)
+{
+ int i;
+
+ lockdep_assert_held(&dev->mt76.mutex);
+
+ clear_bit(MT76_STATE_RUNNING, &dev->mt76.state);
+
+ rcu_read_lock();
+ ieee80211_iter_keys_rcu(dev->mt76.hw, NULL, mt76x02_key_sync, NULL);
+ rcu_read_unlock();
+
+ for (i = 0; i < ARRAY_SIZE(dev->mt76.wcid); i++) {
+ struct ieee80211_sta *sta;
+ struct ieee80211_vif *vif;
+ struct mt76x02_sta *msta;
+ struct mt76_wcid *wcid;
+ void *priv;
+
+ wcid = rcu_dereference_protected(dev->mt76.wcid[i],
+ lockdep_is_held(&dev->mt76.mutex));
+ if (!wcid)
+ continue;
+
+ priv = msta = container_of(wcid, struct mt76x02_sta, wcid);
+ sta = container_of(priv, struct ieee80211_sta, drv_priv);
+
+ priv = msta->vif;
+ vif = container_of(priv, struct ieee80211_vif, drv_priv);
+
+ __mt76_sta_remove(&dev->mt76, vif, sta);
+ memset(msta, 0, sizeof(*msta));
+ }
+
+ dev->vif_mask = 0;
+ dev->beacon_mask = 0;
+}
+
static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
{
u32 mask = dev->mt76.mmio.irqmask;
+ bool restart = dev->mt76.mcu_ops->mcu_restart;
int i;
ieee80211_stop_queues(dev->mt76.hw);
@@ -434,6 +492,9 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
mutex_lock(&dev->mt76.mutex);
+ if (restart)
+ mt76x02_reset_state(dev);
+
if (dev->beacon_mask)
mt76_clear(dev, MT_BEACON_TIME_CFG,
MT_BEACON_TIME_CFG_BEACON_TX |
@@ -452,20 +513,21 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
/* let fw reset DMA */
mt76_set(dev, 0x734, 0x3);
+ if (restart)
+ dev->mt76.mcu_ops->mcu_restart(&dev->mt76);
+
for (i = 0; i < ARRAY_SIZE(dev->mt76.q_tx); i++)
mt76_queue_tx_cleanup(dev, i, true);
for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++)
mt76_queue_rx_reset(dev, i);
- mt76_wr(dev, MT_MAC_SYS_CTRL,
- MT_MAC_SYS_CTRL_ENABLE_TX | MT_MAC_SYS_CTRL_ENABLE_RX);
- mt76_set(dev, MT_WPDMA_GLO_CFG,
- MT_WPDMA_GLO_CFG_TX_DMA_EN | MT_WPDMA_GLO_CFG_RX_DMA_EN);
+ mt76x02_mac_start(dev);
+
if (dev->ed_monitor)
mt76_set(dev, MT_TXOP_CTRL_CFG, MT_TXOP_ED_CCA_EN);
- if (dev->beacon_mask)
+ if (dev->beacon_mask && !restart)
mt76_set(dev, MT_BEACON_TIME_CFG,
MT_BEACON_TIME_CFG_BEACON_TX |
MT_BEACON_TIME_CFG_TBTT_EN);
@@ -486,9 +548,13 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
napi_schedule(&dev->mt76.napi[i]);
}
- ieee80211_wake_queues(dev->mt76.hw);
-
- mt76_txq_schedule_all(&dev->mt76);
+ if (restart) {
+ mt76x02_mcu_function_select(dev, Q_SELECT, 1);
+ ieee80211_restart_hw(dev->mt76.hw);
+ } else {
+ ieee80211_wake_queues(dev->mt76.hw);
+ mt76_txq_schedule_all(&dev->mt76);
+ }
}
static void mt76x02_check_tx_hang(struct mt76x02_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c
index a020c757ba5c..a54b63a96eae 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c
@@ -194,6 +194,8 @@ bool mt76x02_phy_adjust_vga_gain(struct mt76x02_dev *dev)
ret = true;
}
+ dev->cal.agc_lowest_gain = dev->cal.agc_gain_adjust >= limit;
+
return ret;
}
EXPORT_SYMBOL_GPL(mt76x02_phy_adjust_vga_gain);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c
index 43f07461c8d3..6fb52b596d42 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c
@@ -85,8 +85,9 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data,
mt76x02_insert_hdr_pad(skb);
- txwi = skb_push(skb, sizeof(struct mt76x02_txwi));
+ txwi = (struct mt76x02_txwi *)(skb->data - sizeof(struct mt76x02_txwi));
mt76x02_mac_write_txwi(dev, txwi, skb, wcid, sta, len);
+ skb_push(skb, sizeof(struct mt76x02_txwi));
pid = mt76_tx_status_skb_add(mdev, wcid, skb);
txwi->pktid = pid;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index a48c261b0c63..cd072ac614f7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -237,6 +237,8 @@ int mt76x02_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
struct mt76x02_vif *mvif = (struct mt76x02_vif *)vif->drv_priv;
int idx = 0;
+ memset(msta, 0, sizeof(*msta));
+
idx = mt76_wcid_alloc(dev->mt76.wcid_mask, ARRAY_SIZE(dev->mt76.wcid));
if (idx < 0)
return -ENOSPC;
@@ -274,6 +276,8 @@ mt76x02_vif_init(struct mt76x02_dev *dev, struct ieee80211_vif *vif,
struct mt76x02_vif *mvif = (struct mt76x02_vif *)vif->drv_priv;
struct mt76_txq *mtxq;
+ memset(mvif, 0, sizeof(*mvif));
+
mvif->idx = idx;
mvif->group_wcid.idx = MT_VIF_WCID(idx);
mvif->group_wcid.hw_key_idx = -1;
@@ -289,6 +293,12 @@ mt76x02_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
struct mt76x02_dev *dev = hw->priv;
unsigned int idx = 0;
+ /* Allow to change address in HW if we create first interface. */
+ if (!dev->vif_mask &&
+ (((vif->addr[0] ^ dev->mt76.macaddr[0]) & ~GENMASK(4, 1)) ||
+ memcmp(vif->addr + 1, dev->mt76.macaddr + 1, ETH_ALEN - 1)))
+ mt76x02_mac_setaddr(dev, vif->addr);
+
if (vif->addr[0] & BIT(1))
idx = 1 + (((dev->mt76.macaddr[0] ^ vif->addr[0]) >> 2) & 7);
@@ -311,10 +321,6 @@ mt76x02_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
if (dev->vif_mask & BIT(idx))
return -EBUSY;
- /* Allow to change address in HW if we create first interface. */
- if (!dev->vif_mask && !ether_addr_equal(dev->mt76.macaddr, vif->addr))
- mt76x02_mac_setaddr(dev, vif->addr);
-
dev->vif_mask |= BIT(idx);
mt76x02_vif_init(dev, vif, idx);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/init.c
index f8534362e2c8..a30ef2c5a9db 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/init.c
@@ -106,7 +106,7 @@ void mt76_write_mac_initvals(struct mt76x02_dev *dev)
{ MT_TX_SW_CFG1, 0x00010000 },
{ MT_TX_SW_CFG2, 0x00000000 },
{ MT_TXOP_CTRL_CFG, 0x0400583f },
- { MT_TX_RTS_CFG, 0x00100020 },
+ { MT_TX_RTS_CFG, 0x00ffff20 },
{ MT_TX_TIMEOUT_CFG, 0x000a2290 },
{ MT_TX_RETRY_CFG, 0x47f01f0f },
{ MT_EXP_ACK_TIME, 0x002c00dc },
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h b/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h
index 6c619f1c65c9..d7abe3d73bad 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h
@@ -71,6 +71,7 @@ int mt76x2_mcu_load_cr(struct mt76x02_dev *dev, u8 type, u8 temp_level,
void mt76x2_cleanup(struct mt76x02_dev *dev);
+int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard);
void mt76x2_reset_wlan(struct mt76x02_dev *dev, bool enable);
void mt76x2_init_txpower(struct mt76x02_dev *dev,
struct ieee80211_supported_band *sband);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
index 984d9c4c2e1a..d3927a13e92e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
@@ -77,7 +77,7 @@ mt76x2_fixup_xtal(struct mt76x02_dev *dev)
}
}
-static int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard)
+int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard)
{
const u8 *macaddr = dev->mt76.macaddr;
u32 val;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c
index 03e24ae7f66c..605dc66ae83b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c
@@ -165,9 +165,30 @@ error:
return -ENOENT;
}
+static int
+mt76pci_mcu_restart(struct mt76_dev *mdev)
+{
+ struct mt76x02_dev *dev;
+ int ret;
+
+ dev = container_of(mdev, struct mt76x02_dev, mt76);
+
+ mt76x02_mcu_cleanup(dev);
+ mt76x2_mac_reset(dev, true);
+
+ ret = mt76pci_load_firmware(dev);
+ if (ret)
+ return ret;
+
+ mt76_wr(dev, MT_WPDMA_RST_IDX, ~0);
+
+ return 0;
+}
+
int mt76x2_mcu_init(struct mt76x02_dev *dev)
{
static const struct mt76_mcu_ops mt76x2_mcu_ops = {
+ .mcu_restart = mt76pci_mcu_restart,
.mcu_send_msg = mt76x02_mcu_msg_send,
};
int ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c
index 1848e8ab2e21..769a9b972044 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c
@@ -260,10 +260,15 @@ mt76x2_phy_set_gain_val(struct mt76x02_dev *dev)
gain_val[0] = dev->cal.agc_gain_cur[0] - dev->cal.agc_gain_adjust;
gain_val[1] = dev->cal.agc_gain_cur[1] - dev->cal.agc_gain_adjust;
- if (dev->mt76.chandef.width >= NL80211_CHAN_WIDTH_40)
+ val = 0x1836 << 16;
+ if (!mt76x2_has_ext_lna(dev) &&
+ dev->mt76.chandef.width >= NL80211_CHAN_WIDTH_40)
val = 0x1e42 << 16;
- else
- val = 0x1836 << 16;
+
+ if (mt76x2_has_ext_lna(dev) &&
+ dev->mt76.chandef.chan->band == NL80211_BAND_2GHZ &&
+ dev->mt76.chandef.width < NL80211_CHAN_WIDTH_40)
+ val = 0x0f36 << 16;
val |= 0xf8;
@@ -280,6 +285,7 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev)
{
u8 *gain = dev->cal.agc_gain_init;
u8 low_gain_delta, gain_delta;
+ u32 agc_35, agc_37;
bool gain_change;
int low_gain;
u32 val;
@@ -318,6 +324,16 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev)
else
low_gain_delta = 14;
+ agc_37 = 0x2121262c;
+ if (dev->mt76.chandef.chan->band == NL80211_BAND_2GHZ)
+ agc_35 = 0x11111516;
+ else if (low_gain == 2)
+ agc_35 = agc_37 = 0x08080808;
+ else if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80)
+ agc_35 = 0x10101014;
+ else
+ agc_35 = 0x11111116;
+
if (low_gain == 2) {
mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a990);
mt76_wr(dev, MT_BBP(AGC, 35), 0x08080808);
@@ -326,15 +342,13 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev)
dev->cal.agc_gain_adjust = 0;
} else {
mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a991);
- if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80)
- mt76_wr(dev, MT_BBP(AGC, 35), 0x10101014);
- else
- mt76_wr(dev, MT_BBP(AGC, 35), 0x11111116);
- mt76_wr(dev, MT_BBP(AGC, 37), 0x2121262C);
gain_delta = 0;
dev->cal.agc_gain_adjust = low_gain_delta;
}
+ mt76_wr(dev, MT_BBP(AGC, 35), agc_35);
+ mt76_wr(dev, MT_BBP(AGC, 37), agc_37);
+
dev->cal.agc_gain_cur[0] = gain[0] - gain_delta;
dev->cal.agc_gain_cur[1] = gain[1] - gain_delta;
mt76x2_phy_set_gain_val(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
index ddb6b2c48e01..ac0f13d46299 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
@@ -21,11 +21,10 @@
#include "mt76x2u.h"
static const struct usb_device_id mt76x2u_device_table[] = {
- { USB_DEVICE(0x0e8d, 0x7612) }, /* Alfa AWUS036ACM */
{ USB_DEVICE(0x0b05, 0x1833) }, /* Asus USB-AC54 */
{ USB_DEVICE(0x0b05, 0x17eb) }, /* Asus USB-AC55 */
{ USB_DEVICE(0x0b05, 0x180b) }, /* Asus USB-N53 B1 */
- { USB_DEVICE(0x0e8d, 0x7612) }, /* Aukey USB-AC1200 */
+ { USB_DEVICE(0x0e8d, 0x7612) }, /* Aukey USBAC1200 - Alfa AWUS036ACM */
{ USB_DEVICE(0x057c, 0x8503) }, /* Avm FRITZ!WLAN AC860 */
{ USB_DEVICE(0x7392, 0xb711) }, /* Edimax EW 7722 UAC */
{ USB_DEVICE(0x0846, 0x9053) }, /* Netgear A6210 */
@@ -66,6 +65,10 @@ static int mt76x2u_probe(struct usb_interface *intf,
mdev->rev = mt76_rr(dev, MT_ASIC_VERSION);
dev_info(mdev->dev, "ASIC revision: %08x\n", mdev->rev);
+ if (!is_mt76x2(dev)) {
+ err = -ENODEV;
+ goto err;
+ }
err = mt76x2u_register_device(dev);
if (err < 0)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c
index 5e84b4535cb1..3b82345756ea 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c
@@ -93,7 +93,6 @@ int mt76x2u_mac_reset(struct mt76x02_dev *dev)
mt76_wr(dev, MT_TX_LINK_CFG, 0x1020);
mt76_wr(dev, MT_AUTO_RSP_CFG, 0x13);
mt76_wr(dev, MT_MAX_LEN_CFG, 0x2f00);
- mt76_wr(dev, MT_TX_RTS_CFG, 0x92b20);
mt76_wr(dev, MT_WMM_AIFSN, 0x2273);
mt76_wr(dev, MT_WMM_CWMIN, 0x2344);
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 5a349fe3e576..2585df512335 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -289,8 +289,11 @@ mt76_tx(struct mt76_dev *dev, struct ieee80211_sta *sta,
dev->queue_ops->tx_queue_skb(dev, q, skb, wcid, sta);
dev->queue_ops->kick(dev, q);
- if (q->queued > q->ndesc - 8)
+ if (q->queued > q->ndesc - 8 && !q->stopped) {
ieee80211_stop_queue(dev->hw, skb_get_queue_mapping(skb));
+ q->stopped = true;
+ }
+
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(mt76_tx);
@@ -374,7 +377,10 @@ mt76_release_buffered_frames(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
if (last_skb) {
mt76_queue_ps_skb(dev, sta, last_skb, true);
dev->queue_ops->kick(dev, hwq);
+ } else {
+ ieee80211_sta_eosp(sta);
}
+
spin_unlock_bh(&hwq->lock);
}
EXPORT_SYMBOL_GPL(mt76_release_buffered_frames);
@@ -577,6 +583,9 @@ void mt76_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
struct mt76_txq *mtxq = (struct mt76_txq *) txq->drv_priv;
struct mt76_queue *hwq = mtxq->hwq;
+ if (!test_bit(MT76_STATE_RUNNING, &dev->state))
+ return;
+
spin_lock_bh(&hwq->lock);
if (list_empty(&mtxq->list))
list_add_tail(&mtxq->list, &hwq->swq);
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index ae6ada370597..4c1abd492405 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -655,7 +655,11 @@ static void mt76u_tx_tasklet(unsigned long data)
spin_lock_bh(&q->lock);
}
mt76_txq_schedule(dev, q);
- wake = i < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8;
+
+ wake = q->stopped && q->queued < q->ndesc - 8;
+ if (wake)
+ q->stopped = false;
+
if (!q->queued)
wake_up(&dev->tx_wait);
diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c
index d8b7863f7926..6ae7f14dc9bf 100644
--- a/drivers/net/wireless/mediatek/mt7601u/usb.c
+++ b/drivers/net/wireless/mediatek/mt7601u/usb.c
@@ -303,6 +303,10 @@ static int mt7601u_probe(struct usb_interface *usb_intf,
mac_rev = mt7601u_rr(dev, MT_MAC_CSR0);
dev_info(dev->dev, "ASIC revision: %08x MAC revision: %08x\n",
asic_rev, mac_rev);
+ if ((asic_rev >> 16) != 0x7601) {
+ ret = -ENODEV;
+ goto err;
+ }
/* Note: vendor driver skips this check for MT7601U */
if (!(mt7601u_rr(dev, MT_EFUSE_CTRL) & MT_EFUSE_CTRL_SEL))
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 2839bb70badf..f0716f6ce41f 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -404,15 +404,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
struct nvme_ns *ns)
{
- enum nvme_ana_state old;
-
mutex_lock(&ns->head->lock);
- old = ns->ana_state;
ns->ana_grpid = le32_to_cpu(desc->grpid);
ns->ana_state = desc->state;
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
- if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old))
+ if (nvme_state_is_live(ns->ana_state))
nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index e7e08889865e..68c49dd67210 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -627,7 +627,7 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
return ret;
}
-static inline void nvme_tcp_end_request(struct request *rq, __le16 status)
+static inline void nvme_tcp_end_request(struct request *rq, u16 status)
{
union nvme_result res = {};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 2d73b66e3686..b3e765a95af8 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -509,7 +509,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
ret = nvmet_p2pmem_ns_enable(ns);
if (ret)
- goto out_unlock;
+ goto out_dev_disable;
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
nvmet_p2pmem_ns_add_p2p(ctrl, ns);
@@ -550,7 +550,7 @@ out_unlock:
out_dev_put:
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
-
+out_dev_disable:
nvmet_ns_dev_disable(ns);
goto out_unlock;
}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 3e43212d3c1c..bc6ebb51b0bf 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -75,11 +75,11 @@ err:
return ret;
}
-static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
+static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg)
{
- bv->bv_page = sg_page_iter_page(iter);
- bv->bv_offset = iter->sg->offset;
- bv->bv_len = PAGE_SIZE - iter->sg->offset;
+ bv->bv_page = sg_page(sg);
+ bv->bv_offset = sg->offset;
+ bv->bv_len = sg->length;
}
static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
@@ -128,14 +128,14 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
{
- ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
- struct sg_page_iter sg_pg_iter;
+ ssize_t nr_bvec = req->sg_cnt;
unsigned long bv_cnt = 0;
bool is_sync = false;
size_t len = 0, total_len = 0;
ssize_t ret = 0;
loff_t pos;
-
+ int i;
+ struct scatterlist *sg;
if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
is_sync = true;
@@ -147,8 +147,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
}
memset(&req->f.iocb, 0, sizeof(struct kiocb));
- for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
- nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
+ for_each_sg(req->sg, sg, req->sg_cnt, i) {
+ nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg);
len += req->f.bvec[bv_cnt].bv_len;
total_len += req->f.bvec[bv_cnt].bv_len;
bv_cnt++;
@@ -225,7 +225,7 @@ static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
static void nvmet_file_execute_rw(struct nvmet_req *req)
{
- ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
+ ssize_t nr_bvec = req->sg_cnt;
if (!req->sg_cnt || !nr_bvec) {
nvmet_req_complete(req, 0);
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 1be571c20062..6bad04cbb1d3 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -157,8 +157,12 @@
#define DBG_IRT(x...)
#endif
+#ifdef CONFIG_64BIT
+#define COMPARE_IRTE_ADDR(irte, hpa) ((irte)->dest_iosapic_addr == (hpa))
+#else
#define COMPARE_IRTE_ADDR(irte, hpa) \
- ((irte)->dest_iosapic_addr == F_EXTEND(hpa))
+ ((irte)->dest_iosapic_addr == ((hpa) | 0xffffffff00000000ULL))
+#endif
#define IOSAPIC_REG_SELECT 0x00
#define IOSAPIC_REG_WINDOW 0x10
diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c
index 56dd83a45e55..5484a46dafda 100644
--- a/drivers/parport/daisy.c
+++ b/drivers/parport/daisy.c
@@ -213,12 +213,10 @@ void parport_daisy_fini(struct parport *port)
struct pardevice *parport_open(int devnum, const char *name)
{
struct daisydev *p = topology;
- struct pardev_cb par_cb;
struct parport *port;
struct pardevice *dev;
int daisy;
- memset(&par_cb, 0, sizeof(par_cb));
spin_lock(&topology_lock);
while (p && p->devnum != devnum)
p = p->next;
@@ -232,7 +230,7 @@ struct pardevice *parport_open(int devnum, const char *name)
port = parport_get_port(p->port);
spin_unlock(&topology_lock);
- dev = parport_register_dev_model(port, name, &par_cb, devnum);
+ dev = parport_register_device(port, name, NULL, NULL, NULL, 0, NULL);
parport_put_port(port);
if (!dev)
return NULL;
@@ -482,31 +480,3 @@ static int assign_addrs(struct parport *port)
kfree(deviceid);
return detected;
}
-
-static int daisy_drv_probe(struct pardevice *par_dev)
-{
- struct device_driver *drv = par_dev->dev.driver;
-
- if (strcmp(drv->name, "daisy_drv"))
- return -ENODEV;
- if (strcmp(par_dev->name, daisy_dev_name))
- return -ENODEV;
-
- return 0;
-}
-
-static struct parport_driver daisy_driver = {
- .name = "daisy_drv",
- .probe = daisy_drv_probe,
- .devmodel = true,
-};
-
-int daisy_drv_init(void)
-{
- return parport_register_driver(&daisy_driver);
-}
-
-void daisy_drv_exit(void)
-{
- parport_unregister_driver(&daisy_driver);
-}
diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c
index e5e6a463a941..e035174ba205 100644
--- a/drivers/parport/probe.c
+++ b/drivers/parport/probe.c
@@ -257,7 +257,7 @@ static ssize_t parport_read_device_id (struct parport *port, char *buffer,
ssize_t parport_device_id (int devnum, char *buffer, size_t count)
{
ssize_t retval = -ENXIO;
- struct pardevice *dev = parport_open(devnum, daisy_dev_name);
+ struct pardevice *dev = parport_open (devnum, "Device ID probe");
if (!dev)
return -ENXIO;
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 0171b8dbcdcd..5dc53d420ca8 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -137,19 +137,11 @@ static struct bus_type parport_bus_type = {
int parport_bus_init(void)
{
- int retval;
-
- retval = bus_register(&parport_bus_type);
- if (retval)
- return retval;
- daisy_drv_init();
-
- return 0;
+ return bus_register(&parport_bus_type);
}
void parport_bus_exit(void)
{
- daisy_drv_exit();
bus_unregister(&parport_bus_type);
}
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 224d88634115..d994839a3e24 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -273,6 +273,7 @@ enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev);
u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
enum pcie_link_width *width);
void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
+void pcie_report_downtraining(struct pci_dev *dev);
/* Single Root I/O Virtualization */
struct pci_sriov {
diff --git a/drivers/pci/pcie/bw_notification.c b/drivers/pci/pcie/bw_notification.c
index d2eae3b7cc0f..4fa9e3523ee1 100644
--- a/drivers/pci/pcie/bw_notification.c
+++ b/drivers/pci/pcie/bw_notification.c
@@ -30,6 +30,8 @@ static void pcie_enable_link_bandwidth_notification(struct pci_dev *dev)
{
u16 lnk_ctl;
+ pcie_capability_write_word(dev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
+
pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctl);
lnk_ctl |= PCI_EXP_LNKCTL_LBMIE;
pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl);
@@ -44,11 +46,10 @@ static void pcie_disable_link_bandwidth_notification(struct pci_dev *dev)
pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl);
}
-static irqreturn_t pcie_bw_notification_handler(int irq, void *context)
+static irqreturn_t pcie_bw_notification_irq(int irq, void *context)
{
struct pcie_device *srv = context;
struct pci_dev *port = srv->port;
- struct pci_dev *dev;
u16 link_status, events;
int ret;
@@ -58,17 +59,26 @@ static irqreturn_t pcie_bw_notification_handler(int irq, void *context)
if (ret != PCIBIOS_SUCCESSFUL || !events)
return IRQ_NONE;
+ pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
+ pcie_update_link_speed(port->subordinate, link_status);
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t pcie_bw_notification_handler(int irq, void *context)
+{
+ struct pcie_device *srv = context;
+ struct pci_dev *port = srv->port;
+ struct pci_dev *dev;
+
/*
* Print status from downstream devices, not this root port or
* downstream switch port.
*/
down_read(&pci_bus_sem);
list_for_each_entry(dev, &port->subordinate->devices, bus_list)
- __pcie_print_link_status(dev, false);
+ pcie_report_downtraining(dev);
up_read(&pci_bus_sem);
- pcie_update_link_speed(port->subordinate, link_status);
- pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
return IRQ_HANDLED;
}
@@ -80,7 +90,8 @@ static int pcie_bandwidth_notification_probe(struct pcie_device *srv)
if (!pcie_link_bandwidth_notification_supported(srv->port))
return -ENODEV;
- ret = request_threaded_irq(srv->irq, NULL, pcie_bw_notification_handler,
+ ret = request_threaded_irq(srv->irq, pcie_bw_notification_irq,
+ pcie_bw_notification_handler,
IRQF_SHARED, "PCIe BW notif", srv);
if (ret)
return ret;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2ec0df04e0dc..7e12d0163863 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2388,7 +2388,7 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
return dev;
}
-static void pcie_report_downtraining(struct pci_dev *dev)
+void pcie_report_downtraining(struct pci_dev *dev)
{
if (!pci_is_pcie(dev))
return;
diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c
index 5163097b43df..4bbd9ede38c8 100644
--- a/drivers/phy/allwinner/phy-sun4i-usb.c
+++ b/drivers/phy/allwinner/phy-sun4i-usb.c
@@ -485,8 +485,11 @@ static int sun4i_usb_phy_set_mode(struct phy *_phy,
struct sun4i_usb_phy_data *data = to_sun4i_usb_phy_data(phy);
int new_mode;
- if (phy->index != 0)
+ if (phy->index != 0) {
+ if (mode == PHY_MODE_USB_HOST)
+ return 0;
return -EINVAL;
+ }
switch (mode) {
case PHY_MODE_USB_HOST:
diff --git a/drivers/reset/reset-meson-audio-arb.c b/drivers/reset/reset-meson-audio-arb.c
index 91751617b37a..c53a2185a039 100644
--- a/drivers/reset/reset-meson-audio-arb.c
+++ b/drivers/reset/reset-meson-audio-arb.c
@@ -130,6 +130,7 @@ static int meson_audio_arb_probe(struct platform_device *pdev)
arb->rstc.nr_resets = ARRAY_SIZE(axg_audio_arb_reset_bits);
arb->rstc.ops = &meson_audio_arb_rstc_ops;
arb->rstc.of_node = dev->of_node;
+ arb->rstc.owner = THIS_MODULE;
/*
* Enable general :
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index a71734c41693..f933c06bff4f 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -667,9 +667,9 @@ config RTC_DRV_S5M
will be called rtc-s5m.
config RTC_DRV_SD3078
- tristate "ZXW Crystal SD3078"
+ tristate "ZXW Shenzhen whwave SD3078"
help
- If you say yes here you get support for the ZXW Crystal
+ If you say yes here you get support for the ZXW Shenzhen whwave
SD3078 RTC chips.
This driver can also be built as a module. If so, the module
diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c
index e5444296075e..4d6bf9304ceb 100644
--- a/drivers/rtc/rtc-cros-ec.c
+++ b/drivers/rtc/rtc-cros-ec.c
@@ -298,7 +298,7 @@ static int cros_ec_rtc_suspend(struct device *dev)
struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev);
if (device_may_wakeup(dev))
- enable_irq_wake(cros_ec_rtc->cros_ec->irq);
+ return enable_irq_wake(cros_ec_rtc->cros_ec->irq);
return 0;
}
@@ -309,7 +309,7 @@ static int cros_ec_rtc_resume(struct device *dev)
struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev);
if (device_may_wakeup(dev))
- disable_irq_wake(cros_ec_rtc->cros_ec->irq);
+ return disable_irq_wake(cros_ec_rtc->cros_ec->irq);
return 0;
}
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index b4e054c64bad..69b54e5556c0 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -480,6 +480,13 @@ static int da9063_rtc_probe(struct platform_device *pdev)
da9063_data_to_tm(data, &rtc->alarm_time, rtc);
rtc->rtc_sync = false;
+ /*
+ * TODO: some models have alarms on a minute boundary but still support
+ * real hardware interrupts. Add this once the core supports it.
+ */
+ if (config->rtc_data_start != RTC_SEC)
+ rtc->rtc_dev->uie_unsupported = 1;
+
irq_alarm = platform_get_irq_byname(pdev, "ALARM");
ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL,
da9063_alarm_event,
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index d417b203cbc5..1d3de2a3d1a4 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -374,7 +374,7 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
{
unsigned int byte;
- int value = 0xff; /* return 0xff for ignored values */
+ int value = -1; /* return -1 for ignored values */
byte = readb(rtc->regbase + reg_off);
if (byte & AR_ENB) {
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 4159c63a5fd2..a835b31aad99 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -24,6 +24,7 @@
#include <asm/crw.h>
#include <asm/isc.h>
#include <asm/ebcdic.h>
+#include <asm/ap.h>
#include "css.h"
#include "cio.h"
@@ -586,6 +587,15 @@ static void chsc_process_sei_scm_avail(struct chsc_sei_nt0_area *sei_area)
" failed (rc=%d).\n", ret);
}
+static void chsc_process_sei_ap_cfg_chg(struct chsc_sei_nt0_area *sei_area)
+{
+ CIO_CRW_EVENT(3, "chsc: ap config changed\n");
+ if (sei_area->rs != 5)
+ return;
+
+ ap_bus_cfg_chg();
+}
+
static void chsc_process_sei_nt2(struct chsc_sei_nt2_area *sei_area)
{
switch (sei_area->cc) {
@@ -612,6 +622,9 @@ static void chsc_process_sei_nt0(struct chsc_sei_nt0_area *sei_area)
case 2: /* i/o resource accessibility */
chsc_process_sei_res_acc(sei_area);
break;
+ case 3: /* ap config changed */
+ chsc_process_sei_ap_cfg_chg(sei_area);
+ break;
case 7: /* channel-path-availability information */
chsc_process_sei_chp_avail(sei_area);
break;
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index a10cec0e86eb..0b3b9de45c60 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -72,20 +72,24 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
{
struct vfio_ccw_private *private;
struct irb *irb;
+ bool is_final;
private = container_of(work, struct vfio_ccw_private, io_work);
irb = &private->irb;
+ is_final = !(scsw_actl(&irb->scsw) &
+ (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT));
if (scsw_is_solicited(&irb->scsw)) {
cp_update_scsw(&private->cp, &irb->scsw);
- cp_free(&private->cp);
+ if (is_final)
+ cp_free(&private->cp);
}
memcpy(private->io_region->irb_area, irb, sizeof(*irb));
if (private->io_trigger)
eventfd_signal(private->io_trigger, 1);
- if (private->mdev)
+ if (private->mdev && is_final)
private->state = VFIO_CCW_STATE_IDLE;
}
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index e15816ff1265..1546389d71db 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -810,11 +810,18 @@ static int ap_device_remove(struct device *dev)
struct ap_device *ap_dev = to_ap_dev(dev);
struct ap_driver *ap_drv = ap_dev->drv;
+ /* prepare ap queue device removal */
if (is_queue_dev(dev))
- ap_queue_remove(to_ap_queue(dev));
+ ap_queue_prepare_remove(to_ap_queue(dev));
+
+ /* driver's chance to clean up gracefully */
if (ap_drv->remove)
ap_drv->remove(ap_dev);
+ /* now do the ap queue device remove */
+ if (is_queue_dev(dev))
+ ap_queue_remove(to_ap_queue(dev));
+
/* Remove queue/card from list of active queues/cards */
spin_lock_bh(&ap_list_lock);
if (is_card_dev(dev))
@@ -861,6 +868,16 @@ void ap_bus_force_rescan(void)
EXPORT_SYMBOL(ap_bus_force_rescan);
/*
+* A config change has happened, force an ap bus rescan.
+*/
+void ap_bus_cfg_chg(void)
+{
+ AP_DBF(DBF_INFO, "%s config change, forcing bus rescan\n", __func__);
+
+ ap_bus_force_rescan();
+}
+
+/*
* hex2bitmap() - parse hex mask string and set bitmap.
* Valid strings are "0x012345678" with at least one valid hex number.
* Rest of the bitmap to the right is padded with 0. No spaces allowed
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index d0059eae5d94..15a98a673c5c 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -91,6 +91,7 @@ enum ap_state {
AP_STATE_WORKING,
AP_STATE_QUEUE_FULL,
AP_STATE_SUSPEND_WAIT,
+ AP_STATE_REMOVE, /* about to be removed from driver */
AP_STATE_UNBOUND, /* momentary not bound to a driver */
AP_STATE_BORKED, /* broken */
NR_AP_STATES
@@ -252,6 +253,7 @@ void ap_bus_force_rescan(void);
void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg);
struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type);
+void ap_queue_prepare_remove(struct ap_queue *aq);
void ap_queue_remove(struct ap_queue *aq);
void ap_queue_suspend(struct ap_device *ap_dev);
void ap_queue_resume(struct ap_device *ap_dev);
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index ba261210c6da..6a340f2c3556 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -420,6 +420,10 @@ static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = {
[AP_EVENT_POLL] = ap_sm_suspend_read,
[AP_EVENT_TIMEOUT] = ap_sm_nop,
},
+ [AP_STATE_REMOVE] = {
+ [AP_EVENT_POLL] = ap_sm_nop,
+ [AP_EVENT_TIMEOUT] = ap_sm_nop,
+ },
[AP_STATE_UNBOUND] = {
[AP_EVENT_POLL] = ap_sm_nop,
[AP_EVENT_TIMEOUT] = ap_sm_nop,
@@ -740,18 +744,31 @@ void ap_flush_queue(struct ap_queue *aq)
}
EXPORT_SYMBOL(ap_flush_queue);
-void ap_queue_remove(struct ap_queue *aq)
+void ap_queue_prepare_remove(struct ap_queue *aq)
{
- ap_flush_queue(aq);
+ spin_lock_bh(&aq->lock);
+ /* flush queue */
+ __ap_flush_queue(aq);
+ /* set REMOVE state to prevent new messages are queued in */
+ aq->state = AP_STATE_REMOVE;
del_timer_sync(&aq->timeout);
+ spin_unlock_bh(&aq->lock);
+}
- /* reset with zero, also clears irq registration */
+void ap_queue_remove(struct ap_queue *aq)
+{
+ /*
+ * all messages have been flushed and the state is
+ * AP_STATE_REMOVE. Now reset with zero which also
+ * clears the irq registration and move the state
+ * to AP_STATE_UNBOUND to signal that this queue
+ * is not used by any driver currently.
+ */
spin_lock_bh(&aq->lock);
ap_zapq(aq->qid);
aq->state = AP_STATE_UNBOUND;
spin_unlock_bh(&aq->lock);
}
-EXPORT_SYMBOL(ap_queue_remove);
void ap_queue_reinit_state(struct ap_queue *aq)
{
@@ -760,4 +777,3 @@ void ap_queue_reinit_state(struct ap_queue *aq)
ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
spin_unlock_bh(&aq->lock);
}
-EXPORT_SYMBOL(ap_queue_reinit_state);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index eb93c2d27d0a..689c2af7026a 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -586,6 +586,7 @@ static inline bool zcrypt_check_queue(struct ap_perms *perms, int queue)
static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc,
struct zcrypt_queue *zq,
+ struct module **pmod,
unsigned int weight)
{
if (!zq || !try_module_get(zq->queue->ap_dev.drv->driver.owner))
@@ -595,15 +596,15 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc,
atomic_add(weight, &zc->load);
atomic_add(weight, &zq->load);
zq->request_count++;
+ *pmod = zq->queue->ap_dev.drv->driver.owner;
return zq;
}
static inline void zcrypt_drop_queue(struct zcrypt_card *zc,
struct zcrypt_queue *zq,
+ struct module *mod,
unsigned int weight)
{
- struct module *mod = zq->queue->ap_dev.drv->driver.owner;
-
zq->request_count--;
atomic_sub(weight, &zc->load);
atomic_sub(weight, &zq->load);
@@ -653,6 +654,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
unsigned int weight, pref_weight;
unsigned int func_code;
int qid = 0, rc = -ENODEV;
+ struct module *mod;
trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO);
@@ -706,7 +708,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
pref_weight = weight;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -718,7 +720,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
rc = pref_zq->ops->rsa_modexpo(pref_zq, mex);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@@ -735,6 +737,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
unsigned int weight, pref_weight;
unsigned int func_code;
int qid = 0, rc = -ENODEV;
+ struct module *mod;
trace_s390_zcrypt_req(crt, TP_ICARSACRT);
@@ -788,7 +791,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
pref_weight = weight;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -800,7 +803,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@@ -819,6 +822,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
unsigned int func_code;
unsigned short *domain;
int qid = 0, rc = -ENODEV;
+ struct module *mod;
trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB);
@@ -865,7 +869,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
pref_weight = weight;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -881,7 +885,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
rc = pref_zq->ops->send_cprb(pref_zq, xcRB, &ap_msg);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@@ -932,6 +936,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
unsigned int func_code;
struct ap_message ap_msg;
int qid = 0, rc = -ENODEV;
+ struct module *mod;
trace_s390_zcrypt_req(xcrb, TP_ZSENDEP11CPRB);
@@ -1000,7 +1005,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
pref_weight = weight;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -1012,7 +1017,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
rc = pref_zq->ops->send_ep11_cprb(pref_zq, xcrb, &ap_msg);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out_free:
@@ -1033,6 +1038,7 @@ static long zcrypt_rng(char *buffer)
struct ap_message ap_msg;
unsigned int domain;
int qid = 0, rc = -ENODEV;
+ struct module *mod;
trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB);
@@ -1064,7 +1070,7 @@ static long zcrypt_rng(char *buffer)
pref_weight = weight;
}
}
- pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
+ pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@@ -1076,7 +1082,7 @@ static long zcrypt_rng(char *buffer)
rc = pref_zq->ops->rng(pref_zq, buffer, &ap_msg);
spin_lock(&zcrypt_list_lock);
- zcrypt_drop_queue(pref_zc, pref_zq, weight);
+ zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 197b0f5b63e7..44bd6f04c145 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1150,13 +1150,16 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *q,
static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf)
{
+ struct sk_buff *skb;
+
/* release may never happen from within CQ tasklet scope */
WARN_ON_ONCE(atomic_read(&buf->state) == QETH_QDIO_BUF_IN_CQ);
if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING)
qeth_notify_skbs(buf->q, buf, TX_NOTIFY_GENERALERROR);
- __skb_queue_purge(&buf->skb_list);
+ while ((skb = __skb_dequeue(&buf->skb_list)) != NULL)
+ consume_skb(skb);
}
static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 8efb2e8ff8f4..c3067fd3bd9e 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -629,8 +629,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
} /* else fall through */
QETH_TXQ_STAT_INC(queue, tx_dropped);
- QETH_TXQ_STAT_INC(queue, tx_errors);
- dev_kfree_skb_any(skb);
+ kfree_skb(skb);
netif_wake_queue(dev);
return NETDEV_TX_OK;
}
@@ -645,6 +644,8 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
int rc;
+ qeth_l2_vnicc_set_defaults(card);
+
if (gdev->dev.type == &qeth_generic_devtype) {
rc = qeth_l2_create_device_attributes(&gdev->dev);
if (rc)
@@ -652,8 +653,6 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
}
hash_init(card->mac_htable);
- card->info.hwtrap = 0;
- qeth_l2_vnicc_set_defaults(card);
return 0;
}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 7e68d9d16859..53712cf26406 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2096,8 +2096,7 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
tx_drop:
QETH_TXQ_STAT_INC(queue, tx_dropped);
- QETH_TXQ_STAT_INC(queue, tx_errors);
- dev_kfree_skb_any(skb);
+ kfree_skb(skb);
netif_wake_queue(dev);
return NETDEV_TX_OK;
}
@@ -2253,14 +2252,15 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev)
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
int rc;
+ hash_init(card->ip_htable);
+
if (gdev->dev.type == &qeth_generic_devtype) {
rc = qeth_l3_create_device_attributes(&gdev->dev);
if (rc)
return rc;
}
- hash_init(card->ip_htable);
+
hash_init(card->ip_mc_htable);
- card->info.hwtrap = 0;
return 0;
}
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 744a64680d5b..e8fc28dba8df 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -624,6 +624,20 @@ static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action)
add_timer(&erp_action->timer);
}
+void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter,
+ int clear, char *dbftag)
+{
+ unsigned long flags;
+ struct zfcp_port *port;
+
+ write_lock_irqsave(&adapter->erp_lock, flags);
+ read_lock(&adapter->port_list_lock);
+ list_for_each_entry(port, &adapter->port_list, list)
+ _zfcp_erp_port_forced_reopen(port, clear, dbftag);
+ read_unlock(&adapter->port_list_lock);
+ write_unlock_irqrestore(&adapter->erp_lock, flags);
+}
+
static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter,
int clear, char *dbftag)
{
@@ -1341,6 +1355,9 @@ static void zfcp_erp_try_rport_unblock(struct zfcp_port *port)
struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev);
int lun_status;
+ if (sdev->sdev_state == SDEV_DEL ||
+ sdev->sdev_state == SDEV_CANCEL)
+ continue;
if (zsdev->port != port)
continue;
/* LUN under port of interest */
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 3fce47b0b21b..c6acca521ffe 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -70,6 +70,8 @@ extern void zfcp_erp_port_reopen(struct zfcp_port *port, int clear,
char *dbftag);
extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *);
extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *);
+extern void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter,
+ int clear, char *dbftag);
extern void zfcp_erp_set_lun_status(struct scsi_device *, u32);
extern void zfcp_erp_clear_lun_status(struct scsi_device *, u32);
extern void zfcp_erp_lun_reopen(struct scsi_device *, int, char *);
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index db00b5e3abbe..33eddb02ee30 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -239,10 +239,6 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range,
list_for_each_entry(port, &adapter->port_list, list) {
if ((port->d_id & range) == (ntoh24(page->rscn_fid) & range))
zfcp_fc_test_link(port);
- if (!port->d_id)
- zfcp_erp_port_reopen(port,
- ZFCP_STATUS_COMMON_ERP_FAILED,
- "fcrscn1");
}
read_unlock_irqrestore(&adapter->port_list_lock, flags);
}
@@ -250,6 +246,7 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range,
static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req)
{
struct fsf_status_read_buffer *status_buffer = (void *)fsf_req->data;
+ struct zfcp_adapter *adapter = fsf_req->adapter;
struct fc_els_rscn *head;
struct fc_els_rscn_page *page;
u16 i;
@@ -263,6 +260,22 @@ static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req)
no_entries = be16_to_cpu(head->rscn_plen) /
sizeof(struct fc_els_rscn_page);
+ if (no_entries > 1) {
+ /* handle failed ports */
+ unsigned long flags;
+ struct zfcp_port *port;
+
+ read_lock_irqsave(&adapter->port_list_lock, flags);
+ list_for_each_entry(port, &adapter->port_list, list) {
+ if (port->d_id)
+ continue;
+ zfcp_erp_port_reopen(port,
+ ZFCP_STATUS_COMMON_ERP_FAILED,
+ "fcrscn1");
+ }
+ read_unlock_irqrestore(&adapter->port_list_lock, flags);
+ }
+
for (i = 1; i < no_entries; i++) {
/* skip head and start with 1st element */
page++;
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index f4f6a07c5222..221d0dfb8493 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -368,6 +368,10 @@ static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
struct zfcp_adapter *adapter = zfcp_sdev->port->adapter;
int ret = SUCCESS, fc_ret;
+ if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE)) {
+ zfcp_erp_port_forced_reopen_all(adapter, 0, "schrh_p");
+ zfcp_erp_wait(adapter);
+ }
zfcp_erp_adapter_reopen(adapter, 0, "schrh_1");
zfcp_erp_wait(adapter);
fc_ret = fc_block_scsi_eh(scpnt);
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 1df5171594b8..11fb68d7e60d 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -2640,9 +2640,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor)
return capacity;
}
+static inline int aac_pci_offline(struct aac_dev *dev)
+{
+ return pci_channel_offline(dev->pdev) || dev->handle_pci_error;
+}
+
static inline int aac_adapter_check_health(struct aac_dev *dev)
{
- if (unlikely(pci_channel_offline(dev->pdev)))
+ if (unlikely(aac_pci_offline(dev)))
return -1;
return (dev)->a_ops.adapter_check_health(dev);
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index e67e032936ef..78430a7b294c 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -672,7 +672,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
return -ETIMEDOUT;
}
- if (unlikely(pci_channel_offline(dev->pdev)))
+ if (unlikely(aac_pci_offline(dev)))
return -EFAULT;
if ((blink = aac_adapter_check_health(dev)) > 0) {
@@ -772,7 +772,7 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
spin_unlock_irqrestore(&fibptr->event_lock, flags);
- if (unlikely(pci_channel_offline(dev->pdev)))
+ if (unlikely(aac_pci_offline(dev)))
return -EFAULT;
fibptr->flags |= FIB_CONTEXT_FLAG_WAIT;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index dbaa4f131433..3ad997ac3510 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -139,6 +139,7 @@ static const struct {
{ IBMVFC_FC_FAILURE, IBMVFC_VENDOR_SPECIFIC, DID_ERROR, 1, 1, "vendor specific" },
{ IBMVFC_FC_SCSI_ERROR, 0, DID_OK, 1, 0, "SCSI error" },
+ { IBMVFC_FC_SCSI_ERROR, IBMVFC_COMMAND_FAILED, DID_ERROR, 0, 1, "PRLI to device failed." },
};
static void ibmvfc_npiv_login(struct ibmvfc_host *);
@@ -1494,9 +1495,9 @@ static void ibmvfc_log_error(struct ibmvfc_event *evt)
if (rsp->flags & FCP_RSP_LEN_VALID)
rsp_code = rsp->data.info.rsp_code;
- scmd_printk(KERN_ERR, cmnd, "Command (%02X) failed: %s (%x:%x) "
+ scmd_printk(KERN_ERR, cmnd, "Command (%02X) : %s (%x:%x) "
"flags: %x fcp_rsp: %x, resid=%d, scsi_status: %x\n",
- cmnd->cmnd[0], err, vfc_cmd->status, vfc_cmd->error,
+ cmnd->cmnd[0], err, be16_to_cpu(vfc_cmd->status), be16_to_cpu(vfc_cmd->error),
rsp->flags, rsp_code, scsi_get_resid(cmnd), rsp->scsi_status);
}
@@ -2022,7 +2023,7 @@ static int ibmvfc_reset_device(struct scsi_device *sdev, int type, char *desc)
sdev_printk(KERN_ERR, sdev, "%s reset failed: %s (%x:%x) "
"flags: %x fcp_rsp: %x, scsi_status: %x\n", desc,
ibmvfc_get_cmd_error(be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error)),
- rsp_iu.cmd.status, rsp_iu.cmd.error, fc_rsp->flags, rsp_code,
+ be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error), fc_rsp->flags, rsp_code,
fc_rsp->scsi_status);
rsp_rc = -EIO;
} else
@@ -2381,7 +2382,7 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev)
sdev_printk(KERN_ERR, sdev, "Abort failed: %s (%x:%x) "
"flags: %x fcp_rsp: %x, scsi_status: %x\n",
ibmvfc_get_cmd_error(be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error)),
- rsp_iu.cmd.status, rsp_iu.cmd.error, fc_rsp->flags, rsp_code,
+ be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error), fc_rsp->flags, rsp_code,
fc_rsp->scsi_status);
rsp_rc = -EIO;
} else
@@ -2755,16 +2756,18 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
if (crq->format == IBMVFC_PARTITION_MIGRATED) {
/* We need to re-setup the interpartition connection */
- dev_info(vhost->dev, "Re-enabling adapter\n");
+ dev_info(vhost->dev, "Partition migrated, Re-enabling adapter\n");
vhost->client_migrated = 1;
ibmvfc_purge_requests(vhost, DID_REQUEUE);
ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_REENABLE);
- } else {
- dev_err(vhost->dev, "Virtual adapter failed (rc=%d)\n", crq->format);
+ } else if (crq->format == IBMVFC_PARTNER_FAILED || crq->format == IBMVFC_PARTNER_DEREGISTER) {
+ dev_err(vhost->dev, "Host partner adapter deregistered or failed (rc=%d)\n", crq->format);
ibmvfc_purge_requests(vhost, DID_ERROR);
ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_RESET);
+ } else {
+ dev_err(vhost->dev, "Received unknown transport event from partner (rc=%d)\n", crq->format);
}
return;
case IBMVFC_CRQ_CMD_RSP:
@@ -3348,7 +3351,7 @@ static void ibmvfc_tgt_prli_done(struct ibmvfc_event *evt)
tgt_log(tgt, level, "Process Login failed: %s (%x:%x) rc=0x%02X\n",
ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)),
- rsp->status, rsp->error, status);
+ be16_to_cpu(rsp->status), be16_to_cpu(rsp->error), status);
break;
}
@@ -3446,9 +3449,10 @@ static void ibmvfc_tgt_plogi_done(struct ibmvfc_event *evt)
ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
tgt_log(tgt, level, "Port Login failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
- ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), rsp->status, rsp->error,
- ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), rsp->fc_type,
- ibmvfc_get_ls_explain(be16_to_cpu(rsp->fc_explain)), rsp->fc_explain, status);
+ ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)),
+ be16_to_cpu(rsp->status), be16_to_cpu(rsp->error),
+ ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), be16_to_cpu(rsp->fc_type),
+ ibmvfc_get_ls_explain(be16_to_cpu(rsp->fc_explain)), be16_to_cpu(rsp->fc_explain), status);
break;
}
@@ -3619,7 +3623,7 @@ static void ibmvfc_tgt_adisc_done(struct ibmvfc_event *evt)
fc_explain = (be32_to_cpu(mad->fc_iu.response[1]) & 0x0000ff00) >> 8;
tgt_info(tgt, "ADISC failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
ibmvfc_get_cmd_error(be16_to_cpu(mad->iu.status), be16_to_cpu(mad->iu.error)),
- mad->iu.status, mad->iu.error,
+ be16_to_cpu(mad->iu.status), be16_to_cpu(mad->iu.error),
ibmvfc_get_fc_type(fc_reason), fc_reason,
ibmvfc_get_ls_explain(fc_explain), fc_explain, status);
break;
@@ -3831,9 +3835,10 @@ static void ibmvfc_tgt_query_target_done(struct ibmvfc_event *evt)
tgt_log(tgt, level, "Query Target failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n",
ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)),
- rsp->status, rsp->error, ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)),
- rsp->fc_type, ibmvfc_get_gs_explain(be16_to_cpu(rsp->fc_explain)),
- rsp->fc_explain, status);
+ be16_to_cpu(rsp->status), be16_to_cpu(rsp->error),
+ ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), be16_to_cpu(rsp->fc_type),
+ ibmvfc_get_gs_explain(be16_to_cpu(rsp->fc_explain)), be16_to_cpu(rsp->fc_explain),
+ status);
break;
}
@@ -3959,7 +3964,7 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt)
level += ibmvfc_retry_host_init(vhost);
ibmvfc_log(vhost, level, "Discover Targets failed: %s (%x:%x)\n",
ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)),
- rsp->status, rsp->error);
+ be16_to_cpu(rsp->status), be16_to_cpu(rsp->error));
break;
case IBMVFC_MAD_DRIVER_FAILED:
break;
@@ -4024,7 +4029,7 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
ibmvfc_log(vhost, level, "NPIV Login failed: %s (%x:%x)\n",
ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)),
- rsp->status, rsp->error);
+ be16_to_cpu(rsp->status), be16_to_cpu(rsp->error));
ibmvfc_free_event(evt);
return;
case IBMVFC_MAD_CRQ_ERROR:
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index b81a53c4a9a8..459cc288ba1d 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -78,9 +78,14 @@ enum ibmvfc_crq_valid {
IBMVFC_CRQ_XPORT_EVENT = 0xFF,
};
-enum ibmvfc_crq_format {
+enum ibmvfc_crq_init_msg {
IBMVFC_CRQ_INIT = 0x01,
IBMVFC_CRQ_INIT_COMPLETE = 0x02,
+};
+
+enum ibmvfc_crq_xport_evts {
+ IBMVFC_PARTNER_FAILED = 0x01,
+ IBMVFC_PARTNER_DEREGISTER = 0x02,
IBMVFC_PARTITION_MIGRATED = 0x06,
};
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index c98f264f1d83..a497b2c0cb79 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3878,10 +3878,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
* wake up the thread.
*/
spin_lock(&lpfc_cmd->buf_lock);
- if (unlikely(lpfc_cmd->cur_iocbq.iocb_flag & LPFC_DRIVER_ABORTED)) {
- lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
- if (lpfc_cmd->waitq)
- wake_up(lpfc_cmd->waitq);
+ lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
+ if (lpfc_cmd->waitq) {
+ wake_up(lpfc_cmd->waitq);
lpfc_cmd->waitq = NULL;
}
spin_unlock(&lpfc_cmd->buf_lock);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index e57774472e75..1d8c584ec1e9 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -3281,12 +3281,18 @@ mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid)
if (smid < ioc->hi_priority_smid) {
struct scsiio_tracker *st;
+ void *request;
st = _get_st_from_smid(ioc, smid);
if (!st) {
_base_recovery_check(ioc);
return;
}
+
+ /* Clear MPI request frame */
+ request = mpt3sas_base_get_msg_frame(ioc, smid);
+ memset(request, 0, ioc->request_sz);
+
mpt3sas_base_clear_st(ioc, st);
_base_recovery_check(ioc);
return;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 8bb5b8f9f4d2..1ccfbc7eebe0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -1462,11 +1462,23 @@ mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid)
{
struct scsi_cmnd *scmd = NULL;
struct scsiio_tracker *st;
+ Mpi25SCSIIORequest_t *mpi_request;
if (smid > 0 &&
smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) {
u32 unique_tag = smid - 1;
+ mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
+
+ /*
+ * If SCSI IO request is outstanding at driver level then
+ * DevHandle filed must be non-zero. If DevHandle is zero
+ * then it means that this smid is free at driver level,
+ * so return NULL.
+ */
+ if (!mpi_request->DevHandle)
+ return scmd;
+
scmd = scsi_host_find_tag(ioc->shost, unique_tag);
if (scmd) {
st = scsi_cmd_priv(scmd);
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index e74a62448ba4..e5db9a9954dc 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1392,10 +1392,8 @@ static void qedi_free_nvm_iscsi_cfg(struct qedi_ctx *qedi)
static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi)
{
- struct qedi_nvm_iscsi_image nvm_image;
-
qedi->iscsi_image = dma_alloc_coherent(&qedi->pdev->dev,
- sizeof(nvm_image),
+ sizeof(struct qedi_nvm_iscsi_image),
&qedi->nvm_buf_dma, GFP_KERNEL);
if (!qedi->iscsi_image) {
QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n");
@@ -2236,14 +2234,13 @@ static void qedi_boot_release(void *data)
static int qedi_get_boot_info(struct qedi_ctx *qedi)
{
int ret = 1;
- struct qedi_nvm_iscsi_image nvm_image;
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
"Get NVM iSCSI CFG image\n");
ret = qedi_ops->common->nvm_get_image(qedi->cdev,
QED_NVM_IMAGE_ISCSI_CFG,
(char *)qedi->iscsi_image,
- sizeof(nvm_image));
+ sizeof(struct qedi_nvm_iscsi_image));
if (ret)
QEDI_ERR(&qedi->dbg_ctx,
"Could not get NVM image. ret = %d\n", ret);
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 16a18d5d856f..6e4f4931ae17 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -3203,6 +3203,8 @@ static int qla4xxx_conn_bind(struct iscsi_cls_session *cls_session,
if (iscsi_conn_bind(cls_session, cls_conn, is_leading))
return -EINVAL;
ep = iscsi_lookup_endpoint(transport_fd);
+ if (!ep)
+ return -EINVAL;
conn = cls_conn->dd_data;
qla_conn = conn->dd_data;
qla_conn->qla_ep = ep->dd_data;
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index c4cbfd07b916..a08ff3bd6310 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -238,6 +238,7 @@ static struct {
{"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+ {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
{"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
{"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */
diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c
index 5a58cbf3a75d..c14006ac98f9 100644
--- a/drivers/scsi/scsi_dh.c
+++ b/drivers/scsi/scsi_dh.c
@@ -75,6 +75,7 @@ static const struct scsi_dh_blist scsi_dh_blist[] = {
{"NETAPP", "INF-01-00", "rdac", },
{"LSI", "INF-01-00", "rdac", },
{"ENGENIO", "INF-01-00", "rdac", },
+ {"LENOVO", "DE_Series", "rdac", },
{NULL, NULL, NULL },
};
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 6a9040faed00..3b119ca0cc0c 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -771,6 +771,12 @@ store_state_field(struct device *dev, struct device_attribute *attr,
mutex_lock(&sdev->state_mutex);
ret = scsi_device_set_state(sdev, state);
+ /*
+ * If the device state changes to SDEV_RUNNING, we need to run
+ * the queue to avoid I/O hang.
+ */
+ if (ret == 0 && state == SDEV_RUNNING)
+ blk_mq_run_hw_queues(sdev->request_queue, true);
mutex_unlock(&sdev->state_mutex);
return ret == 0 ? count : -EINVAL;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 251db30d0882..2b2bc4b49d78 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1415,11 +1415,6 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
}
- /*
- * XXX and what if there are packets in flight and this close()
- * XXX is followed by a "rmmod sd_mod"?
- */
-
scsi_disk_put(sdkp);
}
@@ -3076,6 +3071,9 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp,
unsigned int opt_xfer_bytes =
logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
+ if (sdkp->opt_xfer_blocks == 0)
+ return false;
+
if (sdkp->opt_xfer_blocks > dev_max) {
sd_first_printk(KERN_WARNING, sdkp,
"Optimal transfer size %u logical blocks " \
@@ -3505,9 +3503,21 @@ static void scsi_disk_release(struct device *dev)
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct gendisk *disk = sdkp->disk;
-
+ struct request_queue *q = disk->queue;
+
ida_free(&sd_index_ida, sdkp->index);
+ /*
+ * Wait until all requests that are in progress have completed.
+ * This is necessary to avoid that e.g. scsi_end_request() crashes
+ * due to clearing the disk->private_data pointer. Wait from inside
+ * scsi_disk_release() instead of from sd_release() to avoid that
+ * freezing and unfreezing the request queue affects user space I/O
+ * in case multiple processes open a /dev/sd... node concurrently.
+ */
+ blk_mq_freeze_queue(q);
+ blk_mq_unfreeze_queue(q);
+
disk->private_data = NULL;
put_disk(disk);
put_device(&sdkp->device->sdev_gendev);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 84380bae20f1..8472de1007ff 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -385,7 +385,7 @@ enum storvsc_request_type {
* This is the end of Protocol specific defines.
*/
-static int storvsc_ringbuffer_size = (256 * PAGE_SIZE);
+static int storvsc_ringbuffer_size = (128 * 1024);
static u32 max_outstanding_req_per_channel;
static int storvsc_vcpus_per_sub_channel = 4;
@@ -668,13 +668,22 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns)
{
struct device *dev = &device->device;
struct storvsc_device *stor_device;
- int num_cpus = num_online_cpus();
int num_sc;
struct storvsc_cmd_request *request;
struct vstor_packet *vstor_packet;
int ret, t;
- num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns);
+ /*
+ * If the number of CPUs is artificially restricted, such as
+ * with maxcpus=1 on the kernel boot line, Hyper-V could offer
+ * sub-channels >= the number of CPUs. These sub-channels
+ * should not be created. The primary channel is already created
+ * and assigned to one CPU, so check against # CPUs - 1.
+ */
+ num_sc = min((int)(num_online_cpus() - 1), max_chns);
+ if (!num_sc)
+ return;
+
stor_device = get_out_stor_device(device);
if (!stor_device)
return;
diff --git a/drivers/soc/bcm/bcm2835-power.c b/drivers/soc/bcm/bcm2835-power.c
index 9351349cf0a9..1e0041ec8132 100644
--- a/drivers/soc/bcm/bcm2835-power.c
+++ b/drivers/soc/bcm/bcm2835-power.c
@@ -150,7 +150,12 @@ struct bcm2835_power {
static int bcm2835_asb_enable(struct bcm2835_power *power, u32 reg)
{
- u64 start = ktime_get_ns();
+ u64 start;
+
+ if (!reg)
+ return 0;
+
+ start = ktime_get_ns();
/* Enable the module's async AXI bridges. */
ASB_WRITE(reg, ASB_READ(reg) & ~ASB_REQ_STOP);
@@ -165,7 +170,12 @@ static int bcm2835_asb_enable(struct bcm2835_power *power, u32 reg)
static int bcm2835_asb_disable(struct bcm2835_power *power, u32 reg)
{
- u64 start = ktime_get_ns();
+ u64 start;
+
+ if (!reg)
+ return 0;
+
+ start = ktime_get_ns();
/* Enable the module's async AXI bridges. */
ASB_WRITE(reg, ASB_READ(reg) | ASB_REQ_STOP);
@@ -475,7 +485,7 @@ static int bcm2835_power_pd_power_off(struct generic_pm_domain *domain)
}
}
-static void
+static int
bcm2835_init_power_domain(struct bcm2835_power *power,
int pd_xlate_index, const char *name)
{
@@ -483,6 +493,17 @@ bcm2835_init_power_domain(struct bcm2835_power *power,
struct bcm2835_power_domain *dom = &power->domains[pd_xlate_index];
dom->clk = devm_clk_get(dev->parent, name);
+ if (IS_ERR(dom->clk)) {
+ int ret = PTR_ERR(dom->clk);
+
+ if (ret == -EPROBE_DEFER)
+ return ret;
+
+ /* Some domains don't have a clk, so make sure that we
+ * don't deref an error pointer later.
+ */
+ dom->clk = NULL;
+ }
dom->base.name = name;
dom->base.power_on = bcm2835_power_pd_power_on;
@@ -495,6 +516,8 @@ bcm2835_init_power_domain(struct bcm2835_power *power,
pm_genpd_init(&dom->base, NULL, true);
power->pd_xlate.domains[pd_xlate_index] = &dom->base;
+
+ return 0;
}
/** bcm2835_reset_reset - Resets a block that has a reset line in the
@@ -592,7 +615,7 @@ static int bcm2835_power_probe(struct platform_device *pdev)
{ BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM0 },
{ BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM1 },
};
- int ret, i;
+ int ret = 0, i;
u32 id;
power = devm_kzalloc(dev, sizeof(*power), GFP_KERNEL);
@@ -619,8 +642,11 @@ static int bcm2835_power_probe(struct platform_device *pdev)
power->pd_xlate.num_domains = ARRAY_SIZE(power_domain_names);
- for (i = 0; i < ARRAY_SIZE(power_domain_names); i++)
- bcm2835_init_power_domain(power, i, power_domain_names[i]);
+ for (i = 0; i < ARRAY_SIZE(power_domain_names); i++) {
+ ret = bcm2835_init_power_domain(power, i, power_domain_names[i]);
+ if (ret)
+ goto fail;
+ }
for (i = 0; i < ARRAY_SIZE(domain_deps); i++) {
pm_genpd_add_subdomain(&power->domains[domain_deps[i].parent].base,
@@ -634,12 +660,21 @@ static int bcm2835_power_probe(struct platform_device *pdev)
ret = devm_reset_controller_register(dev, &power->reset);
if (ret)
- return ret;
+ goto fail;
of_genpd_add_provider_onecell(dev->parent->of_node, &power->pd_xlate);
dev_info(dev, "Broadcom BCM2835 power domains driver");
return 0;
+
+fail:
+ for (i = 0; i < ARRAY_SIZE(power_domain_names); i++) {
+ struct generic_pm_domain *dom = &power->domains[i].base;
+
+ if (dom->name)
+ pm_genpd_remove(dom);
+ }
+ return ret;
}
static int bcm2835_power_remove(struct platform_device *pdev)
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index c0901b96cfe4..62951e836cbc 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -114,8 +114,6 @@ source "drivers/staging/ralink-gdma/Kconfig"
source "drivers/staging/mt7621-mmc/Kconfig"
-source "drivers/staging/mt7621-eth/Kconfig"
-
source "drivers/staging/mt7621-dts/Kconfig"
source "drivers/staging/gasket/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 57c6bce13ff4..d1b17ddcd354 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -47,7 +47,6 @@ obj-$(CONFIG_SPI_MT7621) += mt7621-spi/
obj-$(CONFIG_SOC_MT7621) += mt7621-dma/
obj-$(CONFIG_DMA_RALINK) += ralink-gdma/
obj-$(CONFIG_MTK_MMC) += mt7621-mmc/
-obj-$(CONFIG_NET_MEDIATEK_SOC_STAGING) += mt7621-eth/
obj-$(CONFIG_SOC_MT7621) += mt7621-dts/
obj-$(CONFIG_STAGING_GASKET_FRAMEWORK) += gasket/
obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/
diff --git a/drivers/staging/axis-fifo/Kconfig b/drivers/staging/axis-fifo/Kconfig
index 687537203d9c..d9725888af6f 100644
--- a/drivers/staging/axis-fifo/Kconfig
+++ b/drivers/staging/axis-fifo/Kconfig
@@ -3,6 +3,7 @@
#
config XIL_AXIS_FIFO
tristate "Xilinx AXI-Stream FIFO IP core driver"
+ depends on OF
default n
help
This adds support for the Xilinx AXI-Stream
diff --git a/drivers/staging/comedi/comedidev.h b/drivers/staging/comedi/comedidev.h
index a7d569cfca5d..0dff1ac057cd 100644
--- a/drivers/staging/comedi/comedidev.h
+++ b/drivers/staging/comedi/comedidev.h
@@ -1001,6 +1001,8 @@ int comedi_dio_insn_config(struct comedi_device *dev,
unsigned int mask);
unsigned int comedi_dio_update_state(struct comedi_subdevice *s,
unsigned int *data);
+unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s,
+ struct comedi_cmd *cmd);
unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s);
unsigned int comedi_nscans_left(struct comedi_subdevice *s,
unsigned int nscans);
diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c
index eefa62f42c0f..5a32b8fc000e 100644
--- a/drivers/staging/comedi/drivers.c
+++ b/drivers/staging/comedi/drivers.c
@@ -394,11 +394,13 @@ unsigned int comedi_dio_update_state(struct comedi_subdevice *s,
EXPORT_SYMBOL_GPL(comedi_dio_update_state);
/**
- * comedi_bytes_per_scan() - Get length of asynchronous command "scan" in bytes
+ * comedi_bytes_per_scan_cmd() - Get length of asynchronous command "scan" in
+ * bytes
* @s: COMEDI subdevice.
+ * @cmd: COMEDI command.
*
* Determines the overall scan length according to the subdevice type and the
- * number of channels in the scan.
+ * number of channels in the scan for the specified command.
*
* For digital input, output or input/output subdevices, samples for
* multiple channels are assumed to be packed into one or more unsigned
@@ -408,9 +410,9 @@ EXPORT_SYMBOL_GPL(comedi_dio_update_state);
*
* Returns the overall scan length in bytes.
*/
-unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s)
+unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s,
+ struct comedi_cmd *cmd)
{
- struct comedi_cmd *cmd = &s->async->cmd;
unsigned int num_samples;
unsigned int bits_per_sample;
@@ -427,6 +429,29 @@ unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s)
}
return comedi_samples_to_bytes(s, num_samples);
}
+EXPORT_SYMBOL_GPL(comedi_bytes_per_scan_cmd);
+
+/**
+ * comedi_bytes_per_scan() - Get length of asynchronous command "scan" in bytes
+ * @s: COMEDI subdevice.
+ *
+ * Determines the overall scan length according to the subdevice type and the
+ * number of channels in the scan for the current command.
+ *
+ * For digital input, output or input/output subdevices, samples for
+ * multiple channels are assumed to be packed into one or more unsigned
+ * short or unsigned int values according to the subdevice's %SDF_LSAMPL
+ * flag. For other types of subdevice, samples are assumed to occupy a
+ * whole unsigned short or unsigned int according to the %SDF_LSAMPL flag.
+ *
+ * Returns the overall scan length in bytes.
+ */
+unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s)
+{
+ struct comedi_cmd *cmd = &s->async->cmd;
+
+ return comedi_bytes_per_scan_cmd(s, cmd);
+}
EXPORT_SYMBOL_GPL(comedi_bytes_per_scan);
static unsigned int __comedi_nscans_left(struct comedi_subdevice *s,
diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index 5edf59ac6706..b04dad8c7092 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -3545,6 +3545,7 @@ static int ni_cdio_cmdtest(struct comedi_device *dev,
struct comedi_subdevice *s, struct comedi_cmd *cmd)
{
struct ni_private *devpriv = dev->private;
+ unsigned int bytes_per_scan;
int err = 0;
/* Step 1 : check if triggers are trivially valid */
@@ -3579,9 +3580,12 @@ static int ni_cdio_cmdtest(struct comedi_device *dev,
err |= comedi_check_trigger_arg_is(&cmd->convert_arg, 0);
err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg,
cmd->chanlist_len);
- err |= comedi_check_trigger_arg_max(&cmd->stop_arg,
- s->async->prealloc_bufsz /
- comedi_bytes_per_scan(s));
+ bytes_per_scan = comedi_bytes_per_scan_cmd(s, cmd);
+ if (bytes_per_scan) {
+ err |= comedi_check_trigger_arg_max(&cmd->stop_arg,
+ s->async->prealloc_bufsz /
+ bytes_per_scan);
+ }
if (err)
return 3;
diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c
index 829f7b12e0dc..9bbc68729c11 100644
--- a/drivers/staging/erofs/dir.c
+++ b/drivers/staging/erofs/dir.c
@@ -23,6 +23,21 @@ static const unsigned char erofs_filetype_table[EROFS_FT_MAX] = {
[EROFS_FT_SYMLINK] = DT_LNK,
};
+static void debug_one_dentry(unsigned char d_type, const char *de_name,
+ unsigned int de_namelen)
+{
+#ifdef CONFIG_EROFS_FS_DEBUG
+ /* since the on-disk name could not have the trailing '\0' */
+ unsigned char dbg_namebuf[EROFS_NAME_LEN + 1];
+
+ memcpy(dbg_namebuf, de_name, de_namelen);
+ dbg_namebuf[de_namelen] = '\0';
+
+ debugln("found dirent %s de_len %u d_type %d", dbg_namebuf,
+ de_namelen, d_type);
+#endif
+}
+
static int erofs_fill_dentries(struct dir_context *ctx,
void *dentry_blk, unsigned int *ofs,
unsigned int nameoff, unsigned int maxsize)
@@ -33,14 +48,10 @@ static int erofs_fill_dentries(struct dir_context *ctx,
de = dentry_blk + *ofs;
while (de < end) {
const char *de_name;
- int de_namelen;
+ unsigned int de_namelen;
unsigned char d_type;
-#ifdef CONFIG_EROFS_FS_DEBUG
- unsigned int dbg_namelen;
- unsigned char dbg_namebuf[EROFS_NAME_LEN];
-#endif
- if (unlikely(de->file_type < EROFS_FT_MAX))
+ if (de->file_type < EROFS_FT_MAX)
d_type = erofs_filetype_table[de->file_type];
else
d_type = DT_UNKNOWN;
@@ -48,26 +59,20 @@ static int erofs_fill_dentries(struct dir_context *ctx,
nameoff = le16_to_cpu(de->nameoff);
de_name = (char *)dentry_blk + nameoff;
- de_namelen = unlikely(de + 1 >= end) ?
- /* last directory entry */
- strnlen(de_name, maxsize - nameoff) :
- le16_to_cpu(de[1].nameoff) - nameoff;
+ /* the last dirent in the block? */
+ if (de + 1 >= end)
+ de_namelen = strnlen(de_name, maxsize - nameoff);
+ else
+ de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
/* a corrupted entry is found */
- if (unlikely(de_namelen < 0)) {
+ if (unlikely(nameoff + de_namelen > maxsize ||
+ de_namelen > EROFS_NAME_LEN)) {
DBG_BUGON(1);
return -EIO;
}
-#ifdef CONFIG_EROFS_FS_DEBUG
- dbg_namelen = min(EROFS_NAME_LEN - 1, de_namelen);
- memcpy(dbg_namebuf, de_name, dbg_namelen);
- dbg_namebuf[dbg_namelen] = '\0';
-
- debugln("%s, found de_name %s de_len %d d_type %d", __func__,
- dbg_namebuf, de_namelen, d_type);
-#endif
-
+ debug_one_dentry(d_type, de_name, de_namelen);
if (!dir_emit(ctx, de_name, de_namelen,
le64_to_cpu(de->nid), d_type))
/* stopped by some reason */
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index 8715bc50e09c..31eef8395774 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -972,6 +972,7 @@ repeat:
overlapped = false;
compressed_pages = grp->compressed_pages;
+ err = 0;
for (i = 0; i < clusterpages; ++i) {
unsigned int pagenr;
@@ -981,26 +982,39 @@ repeat:
DBG_BUGON(!page);
DBG_BUGON(!page->mapping);
- if (z_erofs_is_stagingpage(page))
- continue;
+ if (!z_erofs_is_stagingpage(page)) {
#ifdef EROFS_FS_HAS_MANAGED_CACHE
- if (page->mapping == MNGD_MAPPING(sbi)) {
- DBG_BUGON(!PageUptodate(page));
- continue;
- }
+ if (page->mapping == MNGD_MAPPING(sbi)) {
+ if (unlikely(!PageUptodate(page)))
+ err = -EIO;
+ continue;
+ }
#endif
- /* only non-head page could be reused as a compressed page */
- pagenr = z_erofs_onlinepage_index(page);
+ /*
+ * only if non-head page can be selected
+ * for inplace decompression
+ */
+ pagenr = z_erofs_onlinepage_index(page);
- DBG_BUGON(pagenr >= nr_pages);
- DBG_BUGON(pages[pagenr]);
- ++sparsemem_pages;
- pages[pagenr] = page;
+ DBG_BUGON(pagenr >= nr_pages);
+ DBG_BUGON(pages[pagenr]);
+ ++sparsemem_pages;
+ pages[pagenr] = page;
- overlapped = true;
+ overlapped = true;
+ }
+
+ /* PG_error needs checking for inplaced and staging pages */
+ if (unlikely(PageError(page))) {
+ DBG_BUGON(PageUptodate(page));
+ err = -EIO;
+ }
}
+ if (unlikely(err))
+ goto out;
+
llen = (nr_pages << PAGE_SHIFT) - work->pageofs;
if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) {
@@ -1029,6 +1043,10 @@ repeat:
skip_allocpage:
vout = erofs_vmap(pages, nr_pages);
+ if (!vout) {
+ err = -ENOMEM;
+ goto out;
+ }
err = z_erofs_vle_unzip_vmap(compressed_pages,
clusterpages, vout, llen, work->pageofs, overlapped);
@@ -1194,6 +1212,7 @@ repeat:
if (page->mapping == mc) {
WRITE_ONCE(grp->compressed_pages[nr], page);
+ ClearPageError(page);
if (!PagePrivate(page)) {
/*
* impossible to be !PagePrivate(page) for
diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c
index 48b263a2731a..0daac9b984a8 100644
--- a/drivers/staging/erofs/unzip_vle_lz4.c
+++ b/drivers/staging/erofs/unzip_vle_lz4.c
@@ -136,10 +136,13 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
nr_pages = DIV_ROUND_UP(outlen + pageofs, PAGE_SIZE);
- if (clusterpages == 1)
+ if (clusterpages == 1) {
vin = kmap_atomic(compressed_pages[0]);
- else
+ } else {
vin = erofs_vmap(compressed_pages, clusterpages);
+ if (!vin)
+ return -ENOMEM;
+ }
preempt_disable();
vout = erofs_pcpubuf[smp_processor_id()].data;
diff --git a/drivers/staging/mt7621-dts/gbpc1.dts b/drivers/staging/mt7621-dts/gbpc1.dts
index b73385540216..250c15ace2a7 100644
--- a/drivers/staging/mt7621-dts/gbpc1.dts
+++ b/drivers/staging/mt7621-dts/gbpc1.dts
@@ -117,22 +117,6 @@
status = "okay";
};
-&ethernet {
- //mtd-mac-address = <&factory 0xe000>;
- gmac1: mac@0 {
- compatible = "mediatek,eth-mac";
- reg = <0>;
- phy-handle = <&phy1>;
- };
-
- mdio-bus {
- phy1: ethernet-phy@1 {
- reg = <1>;
- phy-mode = "rgmii";
- };
- };
-};
-
&pinctrl {
state_default: pinctrl0 {
gpio {
@@ -141,3 +125,16 @@
};
};
};
+
+&switch0 {
+ ports {
+ port@0 {
+ label = "ethblack";
+ status = "ok";
+ };
+ port@4 {
+ label = "ethblue";
+ status = "ok";
+ };
+ };
+};
diff --git a/drivers/staging/mt7621-dts/mt7621.dtsi b/drivers/staging/mt7621-dts/mt7621.dtsi
index 6aff3680ce4b..17020e24abd2 100644
--- a/drivers/staging/mt7621-dts/mt7621.dtsi
+++ b/drivers/staging/mt7621-dts/mt7621.dtsi
@@ -372,16 +372,83 @@
mediatek,ethsys = <&ethsys>;
- mediatek,switch = <&gsw>;
+ gmac0: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-mode = "rgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+ gmac1: mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ status = "off";
+ phy-mode = "rgmii";
+ phy-handle = <&phy5>;
+ };
mdio-bus {
#address-cells = <1>;
#size-cells = <0>;
- phy1f: ethernet-phy@1f {
- reg = <0x1f>;
+ phy5: ethernet-phy@5 {
+ reg = <5>;
phy-mode = "rgmii";
};
+
+ switch0: switch0@0 {
+ compatible = "mediatek,mt7621";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ mediatek,mcm;
+ resets = <&rstctrl 2>;
+ reset-names = "mcm";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ port@0 {
+ status = "off";
+ reg = <0>;
+ label = "lan0";
+ };
+ port@1 {
+ status = "off";
+ reg = <1>;
+ label = "lan1";
+ };
+ port@2 {
+ status = "off";
+ reg = <2>;
+ label = "lan2";
+ };
+ port@3 {
+ status = "off";
+ reg = <3>;
+ label = "lan3";
+ };
+ port@4 {
+ status = "off";
+ reg = <4>;
+ label = "lan4";
+ };
+ port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "trgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ };
+ };
};
};
diff --git a/drivers/staging/mt7621-eth/Documentation/devicetree/bindings/net/mediatek-net-gsw.txt b/drivers/staging/mt7621-eth/Documentation/devicetree/bindings/net/mediatek-net-gsw.txt
deleted file mode 100644
index 596b38552697..000000000000
--- a/drivers/staging/mt7621-eth/Documentation/devicetree/bindings/net/mediatek-net-gsw.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-Mediatek Gigabit Switch
-=======================
-
-The mediatek gigabit switch can be found on Mediatek SoCs.
-
-Required properties:
-- compatible: Should be "mediatek,mt7620-gsw", "mediatek,mt7621-gsw",
- "mediatek,mt7623-gsw"
-- reg: Address and length of the register set for the device
-- interrupts: Should contain the gigabit switches interrupt
-
-
-Additional required properties for ARM based SoCs:
-- mediatek,reset-pin: phandle describing the reset GPIO
-- clocks: the clocks used by the switch
-- clock-names: the names of the clocks listed in the clocks property
- these should be "trgpll", "esw", "gp2", "gp1"
-- mt7530-supply: the phandle of the regulator used to power the switch
-- mediatek,pctl-regmap: phandle to the port control regmap. this is used to
- setup the drive current
-
-
-Optional properties:
-- interrupt-parent: Should be the phandle for the interrupt controller
- that services interrupts for this device
-
-Example:
-
-gsw: switch@1b100000 {
- compatible = "mediatek,mt7623-gsw";
- reg = <0 0x1b110000 0 0x300000>;
-
- interrupt-parent = <&pio>;
- interrupts = <168 IRQ_TYPE_EDGE_RISING>;
-
- clocks = <&apmixedsys CLK_APMIXED_TRGPLL>,
- <&ethsys CLK_ETHSYS_ESW>,
- <&ethsys CLK_ETHSYS_GP2>,
- <&ethsys CLK_ETHSYS_GP1>;
- clock-names = "trgpll", "esw", "gp2", "gp1";
-
- mt7530-supply = <&mt6323_vpa_reg>;
-
- mediatek,pctl-regmap = <&syscfg_pctl_a>;
- mediatek,reset-pin = <&pio 15 0>;
-
- status = "okay";
-};
diff --git a/drivers/staging/mt7621-eth/Kconfig b/drivers/staging/mt7621-eth/Kconfig
deleted file mode 100644
index 44ea86c7a96c..000000000000
--- a/drivers/staging/mt7621-eth/Kconfig
+++ /dev/null
@@ -1,39 +0,0 @@
-config NET_VENDOR_MEDIATEK_STAGING
- bool "MediaTek ethernet driver - staging version"
- depends on RALINK
- ---help---
- If you have an MT7621 Mediatek SoC with ethernet, say Y.
-
-if NET_VENDOR_MEDIATEK_STAGING
-choice
- prompt "MAC type"
-
-config NET_MEDIATEK_MT7621
- bool "MT7621"
- depends on MIPS && SOC_MT7621
-
-endchoice
-
-config NET_MEDIATEK_SOC_STAGING
- tristate "MediaTek SoC Gigabit Ethernet support"
- depends on NET_VENDOR_MEDIATEK_STAGING
- select PHYLIB
- ---help---
- This driver supports the gigabit ethernet MACs in the
- MediaTek SoC family.
-
-config NET_MEDIATEK_MDIO
- def_bool NET_MEDIATEK_SOC_STAGING
- depends on NET_MEDIATEK_MT7621
- select PHYLIB
-
-config NET_MEDIATEK_MDIO_MT7620
- def_bool NET_MEDIATEK_SOC_STAGING
- depends on NET_MEDIATEK_MT7621
- select NET_MEDIATEK_MDIO
-
-config NET_MEDIATEK_GSW_MT7621
- def_tristate NET_MEDIATEK_SOC_STAGING
- depends on NET_MEDIATEK_MT7621
-
-endif #NET_VENDOR_MEDIATEK_STAGING
diff --git a/drivers/staging/mt7621-eth/Makefile b/drivers/staging/mt7621-eth/Makefile
deleted file mode 100644
index 018bcc3596b3..000000000000
--- a/drivers/staging/mt7621-eth/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-#
-# Makefile for the Ralink SoCs built-in ethernet macs
-#
-
-mtk-eth-soc-y += mtk_eth_soc.o ethtool.o
-
-mtk-eth-soc-$(CONFIG_NET_MEDIATEK_MDIO) += mdio.o
-mtk-eth-soc-$(CONFIG_NET_MEDIATEK_MDIO_MT7620) += mdio_mt7620.o
-
-mtk-eth-soc-$(CONFIG_NET_MEDIATEK_MT7621) += soc_mt7621.o
-
-obj-$(CONFIG_NET_MEDIATEK_GSW_MT7621) += gsw_mt7621.o
-
-obj-$(CONFIG_NET_MEDIATEK_SOC_STAGING) += mtk-eth-soc.o
diff --git a/drivers/staging/mt7621-eth/TODO b/drivers/staging/mt7621-eth/TODO
deleted file mode 100644
index f9e47d4b4cd4..000000000000
--- a/drivers/staging/mt7621-eth/TODO
+++ /dev/null
@@ -1,13 +0,0 @@
-
-- verify devicetree documentation is consistent with code
-- fix ethtool - currently doesn't return valid data.
-- general code review and clean up
-- add support for second MAC on mt7621
-- convert gsw code to use switchdev interfaces
-- md7620_mmi_write etc should probably be wrapped
- in a regmap abstraction.
-- Get soc_mt7621 to work with QDMA TX if possible.
-- Ensure phys are correctly configured when a cable
- is plugged in.
-
-Cc: NeilBrown <neil@brown.name>
diff --git a/drivers/staging/mt7621-eth/ethtool.c b/drivers/staging/mt7621-eth/ethtool.c
deleted file mode 100644
index 8c4228e2c987..000000000000
--- a/drivers/staging/mt7621-eth/ethtool.c
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#include "mtk_eth_soc.h"
-#include "ethtool.h"
-
-struct mtk_stat {
- char name[ETH_GSTRING_LEN];
- unsigned int idx;
-};
-
-#define MTK_HW_STAT(stat) { \
- .name = #stat, \
- .idx = offsetof(struct mtk_hw_stats, stat) / sizeof(u64) \
-}
-
-static const struct mtk_stat mtk_ethtool_hw_stats[] = {
- MTK_HW_STAT(tx_bytes),
- MTK_HW_STAT(tx_packets),
- MTK_HW_STAT(tx_skip),
- MTK_HW_STAT(tx_collisions),
- MTK_HW_STAT(rx_bytes),
- MTK_HW_STAT(rx_packets),
- MTK_HW_STAT(rx_overflow),
- MTK_HW_STAT(rx_fcs_errors),
- MTK_HW_STAT(rx_short_errors),
- MTK_HW_STAT(rx_long_errors),
- MTK_HW_STAT(rx_checksum_errors),
- MTK_HW_STAT(rx_flow_control_packets),
-};
-
-#define MTK_HW_STATS_LEN ARRAY_SIZE(mtk_ethtool_hw_stats)
-
-static int mtk_get_link_ksettings(struct net_device *dev,
- struct ethtool_link_ksettings *cmd)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- int err;
-
- if (!mac->phy_dev)
- return -ENODEV;
-
- if (mac->phy_flags == MTK_PHY_FLAG_ATTACH) {
- err = phy_read_status(mac->phy_dev);
- if (err)
- return -ENODEV;
- }
-
- phy_ethtool_ksettings_get(mac->phy_dev, cmd);
- return 0;
-}
-
-static int mtk_set_link_ksettings(struct net_device *dev,
- const struct ethtool_link_ksettings *cmd)
-{
- struct mtk_mac *mac = netdev_priv(dev);
-
- if (!mac->phy_dev)
- return -ENODEV;
-
- if (cmd->base.phy_address != mac->phy_dev->mdio.addr) {
- if (mac->hw->phy->phy_node[cmd->base.phy_address]) {
- mac->phy_dev = mac->hw->phy->phy[cmd->base.phy_address];
- mac->phy_flags = MTK_PHY_FLAG_PORT;
- } else if (mac->hw->mii_bus) {
- mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus,
- cmd->base.phy_address);
- if (!mac->phy_dev)
- return -ENODEV;
- mac->phy_flags = MTK_PHY_FLAG_ATTACH;
- } else {
- return -ENODEV;
- }
- }
-
- return phy_ethtool_ksettings_set(mac->phy_dev, cmd);
-}
-
-static void mtk_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_soc_data *soc = mac->hw->soc;
-
- strlcpy(info->driver, mac->hw->dev->driver->name, sizeof(info->driver));
- strlcpy(info->bus_info, dev_name(mac->hw->dev), sizeof(info->bus_info));
-
- if (soc->reg_table[MTK_REG_MTK_COUNTER_BASE])
- info->n_stats = MTK_HW_STATS_LEN;
-}
-
-static u32 mtk_get_msglevel(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
-
- return mac->hw->msg_enable;
-}
-
-static void mtk_set_msglevel(struct net_device *dev, u32 value)
-{
- struct mtk_mac *mac = netdev_priv(dev);
-
- mac->hw->msg_enable = value;
-}
-
-static int mtk_nway_reset(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
-
- if (!mac->phy_dev)
- return -EOPNOTSUPP;
-
- return genphy_restart_aneg(mac->phy_dev);
-}
-
-static u32 mtk_get_link(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- int err;
-
- if (!mac->phy_dev)
- goto out_get_link;
-
- if (mac->phy_flags == MTK_PHY_FLAG_ATTACH) {
- err = genphy_update_link(mac->phy_dev);
- if (err)
- goto out_get_link;
- }
-
- return mac->phy_dev->link;
-
-out_get_link:
- return ethtool_op_get_link(dev);
-}
-
-static int mtk_set_ringparam(struct net_device *dev,
- struct ethtool_ringparam *ring)
-{
- struct mtk_mac *mac = netdev_priv(dev);
-
- if ((ring->tx_pending < 2) ||
- (ring->rx_pending < 2) ||
- (ring->rx_pending > mac->hw->soc->dma_ring_size) ||
- (ring->tx_pending > mac->hw->soc->dma_ring_size))
- return -EINVAL;
-
- dev->netdev_ops->ndo_stop(dev);
-
- mac->hw->tx_ring.tx_ring_size = BIT(fls(ring->tx_pending) - 1);
- mac->hw->rx_ring[0].rx_ring_size = BIT(fls(ring->rx_pending) - 1);
-
- return dev->netdev_ops->ndo_open(dev);
-}
-
-static void mtk_get_ringparam(struct net_device *dev,
- struct ethtool_ringparam *ring)
-{
- struct mtk_mac *mac = netdev_priv(dev);
-
- ring->rx_max_pending = mac->hw->soc->dma_ring_size;
- ring->tx_max_pending = mac->hw->soc->dma_ring_size;
- ring->rx_pending = mac->hw->rx_ring[0].rx_ring_size;
- ring->tx_pending = mac->hw->tx_ring.tx_ring_size;
-}
-
-static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data)
-{
- int i;
-
- switch (stringset) {
- case ETH_SS_STATS:
- for (i = 0; i < MTK_HW_STATS_LEN; i++) {
- memcpy(data, mtk_ethtool_hw_stats[i].name,
- ETH_GSTRING_LEN);
- data += ETH_GSTRING_LEN;
- }
- break;
- }
-}
-
-static int mtk_get_sset_count(struct net_device *dev, int sset)
-{
- switch (sset) {
- case ETH_SS_STATS:
- return MTK_HW_STATS_LEN;
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static void mtk_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats, u64 *data)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_hw_stats *hwstats = mac->hw_stats;
- unsigned int start;
- int i;
-
- if (netif_running(dev) && netif_device_present(dev)) {
- if (spin_trylock(&hwstats->stats_lock)) {
- mtk_stats_update_mac(mac);
- spin_unlock(&hwstats->stats_lock);
- }
- }
-
- do {
- start = u64_stats_fetch_begin_irq(&hwstats->syncp);
- for (i = 0; i < MTK_HW_STATS_LEN; i++)
- data[i] = ((u64 *)hwstats)[mtk_ethtool_hw_stats[i].idx];
-
- } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start));
-}
-
-static struct ethtool_ops mtk_ethtool_ops = {
- .get_link_ksettings = mtk_get_link_ksettings,
- .set_link_ksettings = mtk_set_link_ksettings,
- .get_drvinfo = mtk_get_drvinfo,
- .get_msglevel = mtk_get_msglevel,
- .set_msglevel = mtk_set_msglevel,
- .nway_reset = mtk_nway_reset,
- .get_link = mtk_get_link,
- .set_ringparam = mtk_set_ringparam,
- .get_ringparam = mtk_get_ringparam,
-};
-
-void mtk_set_ethtool_ops(struct net_device *netdev)
-{
- struct mtk_mac *mac = netdev_priv(netdev);
- struct mtk_soc_data *soc = mac->hw->soc;
-
- if (soc->reg_table[MTK_REG_MTK_COUNTER_BASE]) {
- mtk_ethtool_ops.get_strings = mtk_get_strings;
- mtk_ethtool_ops.get_sset_count = mtk_get_sset_count;
- mtk_ethtool_ops.get_ethtool_stats = mtk_get_ethtool_stats;
- }
-
- netdev->ethtool_ops = &mtk_ethtool_ops;
-}
diff --git a/drivers/staging/mt7621-eth/ethtool.h b/drivers/staging/mt7621-eth/ethtool.h
deleted file mode 100644
index 0071469aea6c..000000000000
--- a/drivers/staging/mt7621-eth/ethtool.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#ifndef MTK_ETHTOOL_H
-#define MTK_ETHTOOL_H
-
-#include <linux/ethtool.h>
-
-void mtk_set_ethtool_ops(struct net_device *netdev);
-
-#endif /* MTK_ETHTOOL_H */
diff --git a/drivers/staging/mt7621-eth/gsw_mt7620.h b/drivers/staging/mt7621-eth/gsw_mt7620.h
deleted file mode 100644
index 70f7e5481952..000000000000
--- a/drivers/staging/mt7621-eth/gsw_mt7620.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#ifndef _RALINK_GSW_MT7620_H__
-#define _RALINK_GSW_MT7620_H__
-
-#define GSW_REG_PHY_TIMEOUT (5 * HZ)
-
-#define MT7620_GSW_REG_PIAC 0x0004
-
-#define GSW_NUM_VLANS 16
-#define GSW_NUM_VIDS 4096
-#define GSW_NUM_PORTS 7
-#define GSW_PORT6 6
-
-#define GSW_MDIO_ACCESS BIT(31)
-#define GSW_MDIO_READ BIT(19)
-#define GSW_MDIO_WRITE BIT(18)
-#define GSW_MDIO_START BIT(16)
-#define GSW_MDIO_ADDR_SHIFT 20
-#define GSW_MDIO_REG_SHIFT 25
-
-#define GSW_REG_PORT_PMCR(x) (0x3000 + (x * 0x100))
-#define GSW_REG_PORT_STATUS(x) (0x3008 + (x * 0x100))
-#define GSW_REG_SMACCR0 0x3fE4
-#define GSW_REG_SMACCR1 0x3fE8
-#define GSW_REG_CKGCR 0x3ff0
-
-#define GSW_REG_IMR 0x7008
-#define GSW_REG_ISR 0x700c
-#define GSW_REG_GPC1 0x7014
-
-#define SYSC_REG_CHIP_REV_ID 0x0c
-#define SYSC_REG_CFG 0x10
-#define SYSC_REG_CFG1 0x14
-#define RST_CTRL_MCM BIT(2)
-#define SYSC_PAD_RGMII2_MDIO 0x58
-#define SYSC_GPIO_MODE 0x60
-
-#define PORT_IRQ_ST_CHG 0x7f
-
-#define MT7621_ESW_PHY_POLLING 0x0000
-#define MT7620_ESW_PHY_POLLING 0x7000
-
-#define PMCR_IPG BIT(18)
-#define PMCR_MAC_MODE BIT(16)
-#define PMCR_FORCE BIT(15)
-#define PMCR_TX_EN BIT(14)
-#define PMCR_RX_EN BIT(13)
-#define PMCR_BACKOFF BIT(9)
-#define PMCR_BACKPRES BIT(8)
-#define PMCR_RX_FC BIT(5)
-#define PMCR_TX_FC BIT(4)
-#define PMCR_SPEED(_x) (_x << 2)
-#define PMCR_DUPLEX BIT(1)
-#define PMCR_LINK BIT(0)
-
-#define PHY_AN_EN BIT(31)
-#define PHY_PRE_EN BIT(30)
-#define PMY_MDC_CONF(_x) ((_x & 0x3f) << 24)
-
-/* ethernet subsystem config register */
-#define ETHSYS_SYSCFG0 0x14
-/* ethernet subsystem clock register */
-#define ETHSYS_CLKCFG0 0x2c
-#define ETHSYS_TRGMII_CLK_SEL362_5 BIT(11)
-
-/* p5 RGMII wrapper TX clock control register */
-#define MT7530_P5RGMIITXCR 0x7b04
-/* p5 RGMII wrapper RX clock control register */
-#define MT7530_P5RGMIIRXCR 0x7b00
-/* TRGMII TDX ODT registers */
-#define MT7530_TRGMII_TD0_ODT 0x7a54
-#define MT7530_TRGMII_TD1_ODT 0x7a5c
-#define MT7530_TRGMII_TD2_ODT 0x7a64
-#define MT7530_TRGMII_TD3_ODT 0x7a6c
-#define MT7530_TRGMII_TD4_ODT 0x7a74
-#define MT7530_TRGMII_TD5_ODT 0x7a7c
-/* TRGMII TCK ctrl register */
-#define MT7530_TRGMII_TCK_CTRL 0x7a78
-/* TRGMII Tx ctrl register */
-#define MT7530_TRGMII_TXCTRL 0x7a40
-/* port 6 extended control register */
-#define MT7530_P6ECR 0x7830
-/* IO driver control register */
-#define MT7530_IO_DRV_CR 0x7810
-/* top signal control register */
-#define MT7530_TOP_SIG_CTRL 0x7808
-/* modified hwtrap register */
-#define MT7530_MHWTRAP 0x7804
-/* hwtrap status register */
-#define MT7530_HWTRAP 0x7800
-/* status interrupt register */
-#define MT7530_SYS_INT_STS 0x700c
-/* system nterrupt register */
-#define MT7530_SYS_INT_EN 0x7008
-/* system control register */
-#define MT7530_SYS_CTRL 0x7000
-/* port MAC status register */
-#define MT7530_PMSR_P(x) (0x3008 + (x * 0x100))
-/* port MAC control register */
-#define MT7530_PMCR_P(x) (0x3000 + (x * 0x100))
-
-#define MT7621_XTAL_SHIFT 6
-#define MT7621_XTAL_MASK 0x7
-#define MT7621_XTAL_25 6
-#define MT7621_XTAL_40 3
-#define MT7621_MDIO_DRV_MASK (3 << 4)
-#define MT7621_GE1_MODE_MASK (3 << 12)
-
-#define TRGMII_TXCTRL_TXC_INV BIT(30)
-#define P6ECR_INTF_MODE_RGMII BIT(1)
-#define P5RGMIIRXCR_C_ALIGN BIT(8)
-#define P5RGMIIRXCR_DELAY_2 BIT(1)
-#define P5RGMIITXCR_DELAY_2 (BIT(8) | BIT(2))
-
-/* TOP_SIG_CTRL bits */
-#define TOP_SIG_CTRL_NORMAL (BIT(17) | BIT(16))
-
-/* MHWTRAP bits */
-#define MHWTRAP_MANUAL BIT(16)
-#define MHWTRAP_P5_MAC_SEL BIT(13)
-#define MHWTRAP_P6_DIS BIT(8)
-#define MHWTRAP_P5_RGMII_MODE BIT(7)
-#define MHWTRAP_P5_DIS BIT(6)
-#define MHWTRAP_PHY_ACCESS BIT(5)
-
-/* HWTRAP bits */
-#define HWTRAP_XTAL_SHIFT 9
-#define HWTRAP_XTAL_MASK 0x3
-
-/* SYS_CTRL bits */
-#define SYS_CTRL_SW_RST BIT(1)
-#define SYS_CTRL_REG_RST BIT(0)
-
-/* PMCR bits */
-#define PMCR_IFG_XMIT_96 BIT(18)
-#define PMCR_MAC_MODE BIT(16)
-#define PMCR_FORCE_MODE BIT(15)
-#define PMCR_TX_EN BIT(14)
-#define PMCR_RX_EN BIT(13)
-#define PMCR_BACK_PRES_EN BIT(9)
-#define PMCR_BACKOFF_EN BIT(8)
-#define PMCR_TX_FC_EN BIT(5)
-#define PMCR_RX_FC_EN BIT(4)
-#define PMCR_FORCE_SPEED_1000 BIT(3)
-#define PMCR_FORCE_FDX BIT(1)
-#define PMCR_FORCE_LNK BIT(0)
-#define PMCR_FIXED_LINK (PMCR_IFG_XMIT_96 | PMCR_MAC_MODE | \
- PMCR_FORCE_MODE | PMCR_TX_EN | PMCR_RX_EN | \
- PMCR_BACK_PRES_EN | PMCR_BACKOFF_EN | \
- PMCR_FORCE_SPEED_1000 | PMCR_FORCE_FDX | \
- PMCR_FORCE_LNK)
-
-#define PMCR_FIXED_LINK_FC (PMCR_FIXED_LINK | \
- PMCR_TX_FC_EN | PMCR_RX_FC_EN)
-
-/* TRGMII control registers */
-#define GSW_INTF_MODE 0x390
-#define GSW_TRGMII_TD0_ODT 0x354
-#define GSW_TRGMII_TD1_ODT 0x35c
-#define GSW_TRGMII_TD2_ODT 0x364
-#define GSW_TRGMII_TD3_ODT 0x36c
-#define GSW_TRGMII_TXCTL_ODT 0x374
-#define GSW_TRGMII_TCK_ODT 0x37c
-#define GSW_TRGMII_RCK_CTRL 0x300
-
-#define INTF_MODE_TRGMII BIT(1)
-#define TRGMII_RCK_CTRL_RX_RST BIT(31)
-
-/* Mac control registers */
-#define MTK_MAC_P2_MCR 0x200
-#define MTK_MAC_P1_MCR 0x100
-
-#define MAC_MCR_MAX_RX_2K BIT(29)
-#define MAC_MCR_IPG_CFG (BIT(18) | BIT(16))
-#define MAC_MCR_FORCE_MODE BIT(15)
-#define MAC_MCR_TX_EN BIT(14)
-#define MAC_MCR_RX_EN BIT(13)
-#define MAC_MCR_BACKOFF_EN BIT(9)
-#define MAC_MCR_BACKPR_EN BIT(8)
-#define MAC_MCR_FORCE_RX_FC BIT(5)
-#define MAC_MCR_FORCE_TX_FC BIT(4)
-#define MAC_MCR_SPEED_1000 BIT(3)
-#define MAC_MCR_FORCE_DPX BIT(1)
-#define MAC_MCR_FORCE_LINK BIT(0)
-#define MAC_MCR_FIXED_LINK (MAC_MCR_MAX_RX_2K | MAC_MCR_IPG_CFG | \
- MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN | \
- MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | \
- MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_RX_FC | \
- MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \
- MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK)
-#define MAC_MCR_FIXED_LINK_FC (MAC_MCR_MAX_RX_2K | MAC_MCR_IPG_CFG | \
- MAC_MCR_FIXED_LINK)
-
-/* possible XTAL speed */
-#define MT7623_XTAL_40 0
-#define MT7623_XTAL_20 1
-#define MT7623_XTAL_25 3
-
-/* GPIO port control registers */
-#define GPIO_OD33_CTRL8 0x4c0
-#define GPIO_BIAS_CTRL 0xed0
-#define GPIO_DRV_SEL10 0xf00
-
-/* on MT7620 the functio of port 4 can be software configured */
-enum {
- PORT4_EPHY = 0,
- PORT4_EXT,
-};
-
-/* struct mt7620_gsw - the structure that holds the SoC specific data
- * @dev: The Device struct
- * @base: The base address
- * @piac_offset: The PIAC base may change depending on SoC
- * @irq: The IRQ we are using
- * @port4: The port4 mode on MT7620
- * @autopoll: Is MDIO autopolling enabled
- * @ethsys: The ethsys register map
- * @pctl: The pin control register map
- * @clk_gsw: The switch clock
- * @clk_gp1: The gmac1 clock
- * @clk_gp2: The gmac2 clock
- * @clk_trgpll: The trgmii pll clock
- */
-struct mt7620_gsw {
- struct device *dev;
- void __iomem *base;
- u32 piac_offset;
- int irq;
- int port4;
- unsigned long int autopoll;
-
- struct regmap *ethsys;
- struct regmap *pctl;
-
- struct clk *clk_gsw;
- struct clk *clk_gp1;
- struct clk *clk_gp2;
- struct clk *clk_trgpll;
-};
-
-/* switch register I/O wrappers */
-void mtk_switch_w32(struct mt7620_gsw *gsw, u32 val, unsigned int reg);
-u32 mtk_switch_r32(struct mt7620_gsw *gsw, unsigned int reg);
-
-/* the callback used by the driver core to bringup the switch */
-int mtk_gsw_init(struct mtk_eth *eth);
-
-/* MDIO access wrappers */
-int mt7620_mdio_write(struct mii_bus *bus, int phy_addr, int phy_reg, u16 val);
-int mt7620_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg);
-void mt7620_mdio_link_adjust(struct mtk_eth *eth, int port);
-int mt7620_has_carrier(struct mtk_eth *eth);
-void mt7620_print_link_state(struct mtk_eth *eth, int port, int link,
- int speed, int duplex);
-void mt7530_mdio_w32(struct mt7620_gsw *gsw, u32 reg, u32 val);
-u32 mt7530_mdio_r32(struct mt7620_gsw *gsw, u32 reg);
-void mt7530_mdio_m32(struct mt7620_gsw *gsw, u32 mask, u32 set, u32 reg);
-
-u32 _mt7620_mii_write(struct mt7620_gsw *gsw, u32 phy_addr,
- u32 phy_register, u32 write_data);
-u32 _mt7620_mii_read(struct mt7620_gsw *gsw, int phy_addr, int phy_reg);
-void mt7620_handle_carrier(struct mtk_eth *eth);
-
-#endif
diff --git a/drivers/staging/mt7621-eth/gsw_mt7621.c b/drivers/staging/mt7621-eth/gsw_mt7621.c
deleted file mode 100644
index 53767b17bad9..000000000000
--- a/drivers/staging/mt7621-eth/gsw_mt7621.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/platform_device.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-
-#include <ralink_regs.h>
-
-#include "mtk_eth_soc.h"
-#include "gsw_mt7620.h"
-
-void mtk_switch_w32(struct mt7620_gsw *gsw, u32 val, unsigned int reg)
-{
- iowrite32(val, gsw->base + reg);
-}
-EXPORT_SYMBOL_GPL(mtk_switch_w32);
-
-u32 mtk_switch_r32(struct mt7620_gsw *gsw, unsigned int reg)
-{
- return ioread32(gsw->base + reg);
-}
-EXPORT_SYMBOL_GPL(mtk_switch_r32);
-
-static irqreturn_t gsw_interrupt_mt7621(int irq, void *_eth)
-{
- struct mtk_eth *eth = (struct mtk_eth *)_eth;
- struct mt7620_gsw *gsw = (struct mt7620_gsw *)eth->sw_priv;
- u32 reg, i;
-
- reg = mt7530_mdio_r32(gsw, MT7530_SYS_INT_STS);
-
- for (i = 0; i < 5; i++) {
- unsigned int link;
-
- if ((reg & BIT(i)) == 0)
- continue;
-
- link = mt7530_mdio_r32(gsw, MT7530_PMSR_P(i)) & 0x1;
-
- if (link == eth->link[i])
- continue;
-
- eth->link[i] = link;
- if (link)
- netdev_info(*eth->netdev,
- "port %d link up\n", i);
- else
- netdev_info(*eth->netdev,
- "port %d link down\n", i);
- }
-
- mt7530_mdio_w32(gsw, MT7530_SYS_INT_STS, 0x1f);
-
- return IRQ_HANDLED;
-}
-
-static void mt7621_hw_init(struct mtk_eth *eth, struct mt7620_gsw *gsw,
- struct device_node *np)
-{
- u32 i;
- u32 val;
-
- /* hardware reset the switch */
- mtk_reset(eth, RST_CTRL_MCM);
- mdelay(10);
-
- /* reduce RGMII2 PAD driving strength */
- rt_sysc_m32(MT7621_MDIO_DRV_MASK, 0, SYSC_PAD_RGMII2_MDIO);
-
- /* gpio mux - RGMII1=Normal mode */
- rt_sysc_m32(BIT(14), 0, SYSC_GPIO_MODE);
-
- /* set GMAC1 RGMII mode */
- rt_sysc_m32(MT7621_GE1_MODE_MASK, 0, SYSC_REG_CFG1);
-
- /* enable MDIO to control MT7530 */
- rt_sysc_m32(3 << 12, 0, SYSC_GPIO_MODE);
-
- /* turn off all PHYs */
- for (i = 0; i <= 4; i++) {
- val = _mt7620_mii_read(gsw, i, 0x0);
- val |= BIT(11);
- _mt7620_mii_write(gsw, i, 0x0, val);
- }
-
- /* reset the switch */
- mt7530_mdio_w32(gsw, MT7530_SYS_CTRL,
- SYS_CTRL_SW_RST | SYS_CTRL_REG_RST);
- usleep_range(10, 20);
-
- if ((rt_sysc_r32(SYSC_REG_CHIP_REV_ID) & 0xFFFF) == 0x0101) {
- /* GE1, Force 1000M/FD, FC ON, MAX_RX_LENGTH 1536 */
- mtk_switch_w32(gsw, MAC_MCR_FIXED_LINK, MTK_MAC_P2_MCR);
- mt7530_mdio_w32(gsw, MT7530_PMCR_P(6), PMCR_FIXED_LINK);
- } else {
- /* GE1, Force 1000M/FD, FC ON, MAX_RX_LENGTH 1536 */
- mtk_switch_w32(gsw, MAC_MCR_FIXED_LINK_FC, MTK_MAC_P1_MCR);
- mt7530_mdio_w32(gsw, MT7530_PMCR_P(6), PMCR_FIXED_LINK_FC);
- }
-
- /* GE2, Link down */
- mtk_switch_w32(gsw, MAC_MCR_FORCE_MODE, MTK_MAC_P2_MCR);
-
- /* Enable Port 6, P5 as GMAC5, P5 disable */
- val = mt7530_mdio_r32(gsw, MT7530_MHWTRAP);
- /* Enable Port 6 */
- val &= ~MHWTRAP_P6_DIS;
- /* Disable Port 5 */
- val |= MHWTRAP_P5_DIS;
- /* manual override of HW-Trap */
- val |= MHWTRAP_MANUAL;
- mt7530_mdio_w32(gsw, MT7530_MHWTRAP, val);
-
- val = rt_sysc_r32(SYSC_REG_CFG);
- val = (val >> MT7621_XTAL_SHIFT) & MT7621_XTAL_MASK;
- if (val < MT7621_XTAL_25 && val >= MT7621_XTAL_40) {
- /* 40Mhz */
-
- /* disable MT7530 core clock */
- _mt7620_mii_write(gsw, 0, 13, 0x1f);
- _mt7620_mii_write(gsw, 0, 14, 0x410);
- _mt7620_mii_write(gsw, 0, 13, 0x401f);
- _mt7620_mii_write(gsw, 0, 14, 0x0);
-
- /* disable MT7530 PLL */
- _mt7620_mii_write(gsw, 0, 13, 0x1f);
- _mt7620_mii_write(gsw, 0, 14, 0x40d);
- _mt7620_mii_write(gsw, 0, 13, 0x401f);
- _mt7620_mii_write(gsw, 0, 14, 0x2020);
-
- /* for MT7530 core clock = 500Mhz */
- _mt7620_mii_write(gsw, 0, 13, 0x1f);
- _mt7620_mii_write(gsw, 0, 14, 0x40e);
- _mt7620_mii_write(gsw, 0, 13, 0x401f);
- _mt7620_mii_write(gsw, 0, 14, 0x119);
-
- /* enable MT7530 PLL */
- _mt7620_mii_write(gsw, 0, 13, 0x1f);
- _mt7620_mii_write(gsw, 0, 14, 0x40d);
- _mt7620_mii_write(gsw, 0, 13, 0x401f);
- _mt7620_mii_write(gsw, 0, 14, 0x2820);
-
- usleep_range(20, 40);
-
- /* enable MT7530 core clock */
- _mt7620_mii_write(gsw, 0, 13, 0x1f);
- _mt7620_mii_write(gsw, 0, 14, 0x410);
- _mt7620_mii_write(gsw, 0, 13, 0x401f);
- }
-
- /* RGMII */
- _mt7620_mii_write(gsw, 0, 14, 0x1);
-
- /* set MT7530 central align */
- mt7530_mdio_m32(gsw, BIT(0), P6ECR_INTF_MODE_RGMII, MT7530_P6ECR);
- mt7530_mdio_m32(gsw, TRGMII_TXCTRL_TXC_INV, 0,
- MT7530_TRGMII_TXCTRL);
- mt7530_mdio_w32(gsw, MT7530_TRGMII_TCK_CTRL, 0x855);
-
- /* delay setting for 10/1000M */
- mt7530_mdio_w32(gsw, MT7530_P5RGMIIRXCR,
- P5RGMIIRXCR_C_ALIGN | P5RGMIIRXCR_DELAY_2);
- mt7530_mdio_w32(gsw, MT7530_P5RGMIITXCR, 0x14);
-
- /* lower Tx Driving*/
- mt7530_mdio_w32(gsw, MT7530_TRGMII_TD0_ODT, 0x44);
- mt7530_mdio_w32(gsw, MT7530_TRGMII_TD1_ODT, 0x44);
- mt7530_mdio_w32(gsw, MT7530_TRGMII_TD2_ODT, 0x44);
- mt7530_mdio_w32(gsw, MT7530_TRGMII_TD3_ODT, 0x44);
- mt7530_mdio_w32(gsw, MT7530_TRGMII_TD4_ODT, 0x44);
- mt7530_mdio_w32(gsw, MT7530_TRGMII_TD5_ODT, 0x44);
-
- /* turn on all PHYs */
- for (i = 0; i <= 4; i++) {
- val = _mt7620_mii_read(gsw, i, 0);
- val &= ~BIT(11);
- _mt7620_mii_write(gsw, i, 0, val);
- }
-
-#define MT7530_NUM_PORTS 8
-#define REG_ESW_PORT_PCR(x) (0x2004 | ((x) << 8))
-#define REG_ESW_PORT_PVC(x) (0x2010 | ((x) << 8))
-#define REG_ESW_PORT_PPBV1(x) (0x2014 | ((x) << 8))
-#define MT7530_CPU_PORT 6
-
- /* This is copied from mt7530_apply_config in libreCMC driver */
- {
- int i;
-
- for (i = 0; i < MT7530_NUM_PORTS; i++)
- mt7530_mdio_w32(gsw, REG_ESW_PORT_PCR(i), 0x00400000);
-
- mt7530_mdio_w32(gsw, REG_ESW_PORT_PCR(MT7530_CPU_PORT),
- 0x00ff0000);
-
- for (i = 0; i < MT7530_NUM_PORTS; i++)
- mt7530_mdio_w32(gsw, REG_ESW_PORT_PVC(i), 0x810000c0);
- }
-
- /* enable irq */
- mt7530_mdio_m32(gsw, 0, 3 << 16, MT7530_TOP_SIG_CTRL);
- mt7530_mdio_w32(gsw, MT7530_SYS_INT_EN, 0x1f);
-}
-
-static const struct of_device_id mediatek_gsw_match[] = {
- { .compatible = "mediatek,mt7621-gsw" },
- {},
-};
-MODULE_DEVICE_TABLE(of, mediatek_gsw_match);
-
-int mtk_gsw_init(struct mtk_eth *eth)
-{
- struct device_node *np = eth->switch_np;
- struct platform_device *pdev = of_find_device_by_node(np);
- struct mt7620_gsw *gsw;
-
- if (!pdev)
- return -ENODEV;
-
- if (!of_device_is_compatible(np, mediatek_gsw_match->compatible))
- return -EINVAL;
-
- gsw = platform_get_drvdata(pdev);
- eth->sw_priv = gsw;
-
- if (!gsw->irq)
- return -EINVAL;
-
- request_irq(gsw->irq, gsw_interrupt_mt7621, 0,
- "gsw", eth);
- disable_irq(gsw->irq);
-
- mt7621_hw_init(eth, gsw, np);
-
- enable_irq(gsw->irq);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mtk_gsw_init);
-
-static int mt7621_gsw_probe(struct platform_device *pdev)
-{
- struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- struct mt7620_gsw *gsw;
-
- gsw = devm_kzalloc(&pdev->dev, sizeof(struct mt7620_gsw), GFP_KERNEL);
- if (!gsw)
- return -ENOMEM;
-
- gsw->base = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(gsw->base))
- return PTR_ERR(gsw->base);
-
- gsw->dev = &pdev->dev;
- gsw->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
-
- platform_set_drvdata(pdev, gsw);
-
- return 0;
-}
-
-static int mt7621_gsw_remove(struct platform_device *pdev)
-{
- platform_set_drvdata(pdev, NULL);
-
- return 0;
-}
-
-static struct platform_driver gsw_driver = {
- .probe = mt7621_gsw_probe,
- .remove = mt7621_gsw_remove,
- .driver = {
- .name = "mt7621-gsw",
- .of_match_table = mediatek_gsw_match,
- },
-};
-
-module_platform_driver(gsw_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
-MODULE_DESCRIPTION("GBit switch driver for Mediatek MT7621 SoC");
diff --git a/drivers/staging/mt7621-eth/mdio.c b/drivers/staging/mt7621-eth/mdio.c
deleted file mode 100644
index 5fea6a447eed..000000000000
--- a/drivers/staging/mt7621-eth/mdio.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/phy.h>
-#include <linux/of_net.h>
-#include <linux/of_mdio.h>
-
-#include "mtk_eth_soc.h"
-#include "mdio.h"
-
-static int mtk_mdio_reset(struct mii_bus *bus)
-{
- /* TODO */
- return 0;
-}
-
-static void mtk_phy_link_adjust(struct net_device *dev)
-{
- struct mtk_eth *eth = netdev_priv(dev);
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&eth->phy->lock, flags);
- for (i = 0; i < 8; i++) {
- if (eth->phy->phy_node[i]) {
- struct phy_device *phydev = eth->phy->phy[i];
- int status_change = 0;
-
- if (phydev->link)
- if (eth->phy->duplex[i] != phydev->duplex ||
- eth->phy->speed[i] != phydev->speed)
- status_change = 1;
-
- if (phydev->link != eth->link[i])
- status_change = 1;
-
- switch (phydev->speed) {
- case SPEED_1000:
- case SPEED_100:
- case SPEED_10:
- eth->link[i] = phydev->link;
- eth->phy->duplex[i] = phydev->duplex;
- eth->phy->speed[i] = phydev->speed;
-
- if (status_change &&
- eth->soc->mdio_adjust_link)
- eth->soc->mdio_adjust_link(eth, i);
- break;
- }
- }
- }
- spin_unlock_irqrestore(&eth->phy->lock, flags);
-}
-
-int mtk_connect_phy_node(struct mtk_eth *eth, struct mtk_mac *mac,
- struct device_node *phy_node)
-{
- const __be32 *_port = NULL;
- struct phy_device *phydev;
- int phy_mode, port;
-
- _port = of_get_property(phy_node, "reg", NULL);
-
- if (!_port || (be32_to_cpu(*_port) >= 0x20)) {
- pr_err("%pOFn: invalid port id\n", phy_node);
- return -EINVAL;
- }
- port = be32_to_cpu(*_port);
- phy_mode = of_get_phy_mode(phy_node);
- if (phy_mode < 0) {
- dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode);
- eth->phy->phy_node[port] = NULL;
- return -EINVAL;
- }
-
- phydev = of_phy_connect(eth->netdev[mac->id], phy_node,
- mtk_phy_link_adjust, 0, phy_mode);
- if (!phydev) {
- dev_err(eth->dev, "could not connect to PHY\n");
- eth->phy->phy_node[port] = NULL;
- return -ENODEV;
- }
-
- phydev->supported &= PHY_1000BT_FEATURES;
- phydev->advertising = phydev->supported;
-
- dev_info(eth->dev,
- "connected port %d to PHY at %s [uid=%08x, driver=%s]\n",
- port, phydev_name(phydev), phydev->phy_id,
- phydev->drv->name);
-
- eth->phy->phy[port] = phydev;
- eth->link[port] = 0;
-
- return 0;
-}
-
-static void phy_init(struct mtk_eth *eth, struct mtk_mac *mac,
- struct phy_device *phy)
-{
- phy_attach(eth->netdev[mac->id], phydev_name(phy),
- PHY_INTERFACE_MODE_MII);
-
- phy->autoneg = AUTONEG_ENABLE;
- phy->speed = 0;
- phy->duplex = 0;
- phy_set_max_speed(phy, SPEED_100);
- phy->advertising = phy->supported | ADVERTISED_Autoneg;
-
- phy_start_aneg(phy);
-}
-
-static int mtk_phy_connect(struct mtk_mac *mac)
-{
- struct mtk_eth *eth = mac->hw;
- int i;
-
- for (i = 0; i < 8; i++) {
- if (eth->phy->phy_node[i]) {
- if (!mac->phy_dev) {
- mac->phy_dev = eth->phy->phy[i];
- mac->phy_flags = MTK_PHY_FLAG_PORT;
- }
- } else if (eth->mii_bus) {
- struct phy_device *phy;
-
- phy = mdiobus_get_phy(eth->mii_bus, i);
- if (phy) {
- phy_init(eth, mac, phy);
- if (!mac->phy_dev) {
- mac->phy_dev = phy;
- mac->phy_flags = MTK_PHY_FLAG_ATTACH;
- }
- }
- }
- }
-
- return 0;
-}
-
-static void mtk_phy_disconnect(struct mtk_mac *mac)
-{
- struct mtk_eth *eth = mac->hw;
- unsigned long flags;
- int i;
-
- for (i = 0; i < 8; i++)
- if (eth->phy->phy_fixed[i]) {
- spin_lock_irqsave(&eth->phy->lock, flags);
- eth->link[i] = 0;
- if (eth->soc->mdio_adjust_link)
- eth->soc->mdio_adjust_link(eth, i);
- spin_unlock_irqrestore(&eth->phy->lock, flags);
- } else if (eth->phy->phy[i]) {
- phy_disconnect(eth->phy->phy[i]);
- } else if (eth->mii_bus) {
- struct phy_device *phy =
- mdiobus_get_phy(eth->mii_bus, i);
-
- if (phy)
- phy_detach(phy);
- }
-}
-
-static void mtk_phy_start(struct mtk_mac *mac)
-{
- struct mtk_eth *eth = mac->hw;
- unsigned long flags;
- int i;
-
- for (i = 0; i < 8; i++) {
- if (eth->phy->phy_fixed[i]) {
- spin_lock_irqsave(&eth->phy->lock, flags);
- eth->link[i] = 1;
- if (eth->soc->mdio_adjust_link)
- eth->soc->mdio_adjust_link(eth, i);
- spin_unlock_irqrestore(&eth->phy->lock, flags);
- } else if (eth->phy->phy[i]) {
- phy_start(eth->phy->phy[i]);
- }
- }
-}
-
-static void mtk_phy_stop(struct mtk_mac *mac)
-{
- struct mtk_eth *eth = mac->hw;
- unsigned long flags;
- int i;
-
- for (i = 0; i < 8; i++)
- if (eth->phy->phy_fixed[i]) {
- spin_lock_irqsave(&eth->phy->lock, flags);
- eth->link[i] = 0;
- if (eth->soc->mdio_adjust_link)
- eth->soc->mdio_adjust_link(eth, i);
- spin_unlock_irqrestore(&eth->phy->lock, flags);
- } else if (eth->phy->phy[i]) {
- phy_stop(eth->phy->phy[i]);
- }
-}
-
-static struct mtk_phy phy_ralink = {
- .connect = mtk_phy_connect,
- .disconnect = mtk_phy_disconnect,
- .start = mtk_phy_start,
- .stop = mtk_phy_stop,
-};
-
-int mtk_mdio_init(struct mtk_eth *eth)
-{
- struct device_node *mii_np;
- int err;
-
- if (!eth->soc->mdio_read || !eth->soc->mdio_write)
- return 0;
-
- spin_lock_init(&phy_ralink.lock);
- eth->phy = &phy_ralink;
-
- mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus");
- if (!mii_np) {
- dev_err(eth->dev, "no %s child node found", "mdio-bus");
- return -ENODEV;
- }
-
- if (!of_device_is_available(mii_np)) {
- err = 0;
- goto err_put_node;
- }
-
- eth->mii_bus = mdiobus_alloc();
- if (!eth->mii_bus) {
- err = -ENOMEM;
- goto err_put_node;
- }
-
- eth->mii_bus->name = "mdio";
- eth->mii_bus->read = eth->soc->mdio_read;
- eth->mii_bus->write = eth->soc->mdio_write;
- eth->mii_bus->reset = mtk_mdio_reset;
- eth->mii_bus->priv = eth;
- eth->mii_bus->parent = eth->dev;
-
- snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%pOFn", mii_np);
- err = of_mdiobus_register(eth->mii_bus, mii_np);
- if (err)
- goto err_free_bus;
-
- return 0;
-
-err_free_bus:
- kfree(eth->mii_bus);
-err_put_node:
- of_node_put(mii_np);
- eth->mii_bus = NULL;
- return err;
-}
-
-void mtk_mdio_cleanup(struct mtk_eth *eth)
-{
- if (!eth->mii_bus)
- return;
-
- mdiobus_unregister(eth->mii_bus);
- of_node_put(eth->mii_bus->dev.of_node);
- kfree(eth->mii_bus);
-}
diff --git a/drivers/staging/mt7621-eth/mdio.h b/drivers/staging/mt7621-eth/mdio.h
deleted file mode 100644
index b14e23842a01..000000000000
--- a/drivers/staging/mt7621-eth/mdio.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#ifndef _RALINK_MDIO_H__
-#define _RALINK_MDIO_H__
-
-#ifdef CONFIG_NET_MEDIATEK_MDIO
-int mtk_mdio_init(struct mtk_eth *eth);
-void mtk_mdio_cleanup(struct mtk_eth *eth);
-int mtk_connect_phy_node(struct mtk_eth *eth, struct mtk_mac *mac,
- struct device_node *phy_node);
-#else
-static inline int mtk_mdio_init(struct mtk_eth *eth) { return 0; }
-static inline void mtk_mdio_cleanup(struct mtk_eth *eth) {}
-#endif
-#endif
diff --git a/drivers/staging/mt7621-eth/mdio_mt7620.c b/drivers/staging/mt7621-eth/mdio_mt7620.c
deleted file mode 100644
index ced605c2914e..000000000000
--- a/drivers/staging/mt7621-eth/mdio_mt7620.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-
-#include "mtk_eth_soc.h"
-#include "gsw_mt7620.h"
-#include "mdio.h"
-
-static int mt7620_mii_busy_wait(struct mt7620_gsw *gsw)
-{
- unsigned long t_start = jiffies;
-
- while (1) {
- if (!(mtk_switch_r32(gsw,
- gsw->piac_offset + MT7620_GSW_REG_PIAC) &
- GSW_MDIO_ACCESS))
- return 0;
- if (time_after(jiffies, t_start + GSW_REG_PHY_TIMEOUT))
- break;
- }
-
- dev_err(gsw->dev, "mdio: MDIO timeout\n");
- return -1;
-}
-
-u32 _mt7620_mii_write(struct mt7620_gsw *gsw, u32 phy_addr,
- u32 phy_register, u32 write_data)
-{
- if (mt7620_mii_busy_wait(gsw))
- return -1;
-
- write_data &= 0xffff;
-
- mtk_switch_w32(gsw, GSW_MDIO_ACCESS | GSW_MDIO_START | GSW_MDIO_WRITE |
- (phy_register << GSW_MDIO_REG_SHIFT) |
- (phy_addr << GSW_MDIO_ADDR_SHIFT) | write_data,
- MT7620_GSW_REG_PIAC);
-
- if (mt7620_mii_busy_wait(gsw))
- return -1;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(_mt7620_mii_write);
-
-u32 _mt7620_mii_read(struct mt7620_gsw *gsw, int phy_addr, int phy_reg)
-{
- u32 d;
-
- if (mt7620_mii_busy_wait(gsw))
- return 0xffff;
-
- mtk_switch_w32(gsw, GSW_MDIO_ACCESS | GSW_MDIO_START | GSW_MDIO_READ |
- (phy_reg << GSW_MDIO_REG_SHIFT) |
- (phy_addr << GSW_MDIO_ADDR_SHIFT),
- MT7620_GSW_REG_PIAC);
-
- if (mt7620_mii_busy_wait(gsw))
- return 0xffff;
-
- d = mtk_switch_r32(gsw, MT7620_GSW_REG_PIAC) & 0xffff;
-
- return d;
-}
-EXPORT_SYMBOL_GPL(_mt7620_mii_read);
-
-int mt7620_mdio_write(struct mii_bus *bus, int phy_addr, int phy_reg, u16 val)
-{
- struct mtk_eth *eth = bus->priv;
- struct mt7620_gsw *gsw = (struct mt7620_gsw *)eth->sw_priv;
-
- return _mt7620_mii_write(gsw, phy_addr, phy_reg, val);
-}
-
-int mt7620_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg)
-{
- struct mtk_eth *eth = bus->priv;
- struct mt7620_gsw *gsw = (struct mt7620_gsw *)eth->sw_priv;
-
- return _mt7620_mii_read(gsw, phy_addr, phy_reg);
-}
-
-void mt7530_mdio_w32(struct mt7620_gsw *gsw, u32 reg, u32 val)
-{
- _mt7620_mii_write(gsw, 0x1f, 0x1f, (reg >> 6) & 0x3ff);
- _mt7620_mii_write(gsw, 0x1f, (reg >> 2) & 0xf, val & 0xffff);
- _mt7620_mii_write(gsw, 0x1f, 0x10, val >> 16);
-}
-EXPORT_SYMBOL_GPL(mt7530_mdio_w32);
-
-u32 mt7530_mdio_r32(struct mt7620_gsw *gsw, u32 reg)
-{
- u16 high, low;
-
- _mt7620_mii_write(gsw, 0x1f, 0x1f, (reg >> 6) & 0x3ff);
- low = _mt7620_mii_read(gsw, 0x1f, (reg >> 2) & 0xf);
- high = _mt7620_mii_read(gsw, 0x1f, 0x10);
-
- return (high << 16) | (low & 0xffff);
-}
-EXPORT_SYMBOL_GPL(mt7530_mdio_r32);
-
-void mt7530_mdio_m32(struct mt7620_gsw *gsw, u32 mask, u32 set, u32 reg)
-{
- u32 val = mt7530_mdio_r32(gsw, reg);
-
- val &= ~mask;
- val |= set;
- mt7530_mdio_w32(gsw, reg, val);
-}
-EXPORT_SYMBOL_GPL(mt7530_mdio_m32);
-
-static unsigned char *mtk_speed_str(int speed)
-{
- switch (speed) {
- case 2:
- case SPEED_1000:
- return "1000";
- case 1:
- case SPEED_100:
- return "100";
- case 0:
- case SPEED_10:
- return "10";
- }
-
- return "? ";
-}
-
-int mt7620_has_carrier(struct mtk_eth *eth)
-{
- struct mt7620_gsw *gsw = (struct mt7620_gsw *)eth->sw_priv;
- int i;
-
- for (i = 0; i < GSW_PORT6; i++)
- if (mt7530_mdio_r32(gsw, GSW_REG_PORT_STATUS(i)) & 0x1)
- return 1;
- return 0;
-}
-
-void mt7620_print_link_state(struct mtk_eth *eth, int port, int link,
- int speed, int duplex)
-{
- struct mt7620_gsw *gsw = eth->sw_priv;
-
- if (link)
- dev_info(gsw->dev, "port %d link up (%sMbps/%s duplex)\n",
- port, mtk_speed_str(speed),
- (duplex) ? "Full" : "Half");
- else
- dev_info(gsw->dev, "port %d link down\n", port);
-}
-
-void mt7620_mdio_link_adjust(struct mtk_eth *eth, int port)
-{
- mt7620_print_link_state(eth, port, eth->link[port],
- eth->phy->speed[port],
- (eth->phy->duplex[port] == DUPLEX_FULL));
-}
diff --git a/drivers/staging/mt7621-eth/mtk_eth_soc.c b/drivers/staging/mt7621-eth/mtk_eth_soc.c
deleted file mode 100644
index 6027b19f7bc2..000000000000
--- a/drivers/staging/mt7621-eth/mtk_eth_soc.c
+++ /dev/null
@@ -1,2176 +0,0 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/platform_device.h>
-#include <linux/of_device.h>
-#include <linux/mfd/syscon.h>
-#include <linux/clk.h>
-#include <linux/of_net.h>
-#include <linux/of_mdio.h>
-#include <linux/if_vlan.h>
-#include <linux/reset.h>
-#include <linux/tcp.h>
-#include <linux/io.h>
-#include <linux/bug.h>
-#include <linux/regmap.h>
-
-#include "mtk_eth_soc.h"
-#include "mdio.h"
-#include "ethtool.h"
-
-#define MAX_RX_LENGTH 1536
-#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
-#define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
-#define DMA_DUMMY_DESC 0xffffffff
-#define MTK_DEFAULT_MSG_ENABLE \
- (NETIF_MSG_DRV | \
- NETIF_MSG_PROBE | \
- NETIF_MSG_LINK | \
- NETIF_MSG_TIMER | \
- NETIF_MSG_IFDOWN | \
- NETIF_MSG_IFUP | \
- NETIF_MSG_RX_ERR | \
- NETIF_MSG_TX_ERR)
-
-#define TX_DMA_DESP2_DEF (TX_DMA_LS0 | TX_DMA_DONE)
-#define NEXT_TX_DESP_IDX(X) (((X) + 1) & (ring->tx_ring_size - 1))
-#define NEXT_RX_DESP_IDX(X) (((X) + 1) & (ring->rx_ring_size - 1))
-
-#define SYSC_REG_RSTCTRL 0x34
-
-static int mtk_msg_level = -1;
-module_param_named(msg_level, mtk_msg_level, int, 0);
-MODULE_PARM_DESC(msg_level, "Message level (-1=defaults,0=none,...,16=all)");
-
-static const u16 mtk_reg_table_default[MTK_REG_COUNT] = {
- [MTK_REG_PDMA_GLO_CFG] = MTK_PDMA_GLO_CFG,
- [MTK_REG_PDMA_RST_CFG] = MTK_PDMA_RST_CFG,
- [MTK_REG_DLY_INT_CFG] = MTK_DLY_INT_CFG,
- [MTK_REG_TX_BASE_PTR0] = MTK_TX_BASE_PTR0,
- [MTK_REG_TX_MAX_CNT0] = MTK_TX_MAX_CNT0,
- [MTK_REG_TX_CTX_IDX0] = MTK_TX_CTX_IDX0,
- [MTK_REG_TX_DTX_IDX0] = MTK_TX_DTX_IDX0,
- [MTK_REG_RX_BASE_PTR0] = MTK_RX_BASE_PTR0,
- [MTK_REG_RX_MAX_CNT0] = MTK_RX_MAX_CNT0,
- [MTK_REG_RX_CALC_IDX0] = MTK_RX_CALC_IDX0,
- [MTK_REG_RX_DRX_IDX0] = MTK_RX_DRX_IDX0,
- [MTK_REG_MTK_INT_ENABLE] = MTK_INT_ENABLE,
- [MTK_REG_MTK_INT_STATUS] = MTK_INT_STATUS,
- [MTK_REG_MTK_DMA_VID_BASE] = MTK_DMA_VID0,
- [MTK_REG_MTK_COUNTER_BASE] = MTK_GDMA1_TX_GBCNT,
- [MTK_REG_MTK_RST_GL] = MTK_RST_GL,
-};
-
-static const u16 *mtk_reg_table = mtk_reg_table_default;
-
-void mtk_w32(struct mtk_eth *eth, u32 val, unsigned int reg)
-{
- __raw_writel(val, eth->base + reg);
-}
-
-u32 mtk_r32(struct mtk_eth *eth, unsigned int reg)
-{
- return __raw_readl(eth->base + reg);
-}
-
-static void mtk_reg_w32(struct mtk_eth *eth, u32 val, enum mtk_reg reg)
-{
- mtk_w32(eth, val, mtk_reg_table[reg]);
-}
-
-static u32 mtk_reg_r32(struct mtk_eth *eth, enum mtk_reg reg)
-{
- return mtk_r32(eth, mtk_reg_table[reg]);
-}
-
-/* these bits are also exposed via the reset-controller API. however the switch
- * and FE need to be brought out of reset in the exakt same moemtn and the
- * reset-controller api does not provide this feature yet. Do the reset manually
- * until we fixed the reset-controller api to be able to do this
- */
-void mtk_reset(struct mtk_eth *eth, u32 reset_bits)
-{
- u32 val;
-
- regmap_read(eth->ethsys, SYSC_REG_RSTCTRL, &val);
- val |= reset_bits;
- regmap_write(eth->ethsys, SYSC_REG_RSTCTRL, val);
- usleep_range(10, 20);
- val &= ~reset_bits;
- regmap_write(eth->ethsys, SYSC_REG_RSTCTRL, val);
- usleep_range(10, 20);
-}
-EXPORT_SYMBOL(mtk_reset);
-
-static inline void mtk_irq_ack(struct mtk_eth *eth, u32 mask)
-{
- if (eth->soc->dma_type & MTK_PDMA)
- mtk_reg_w32(eth, mask, MTK_REG_MTK_INT_STATUS);
- if (eth->soc->dma_type & MTK_QDMA)
- mtk_w32(eth, mask, MTK_QMTK_INT_STATUS);
-}
-
-static inline u32 mtk_irq_pending(struct mtk_eth *eth)
-{
- u32 status = 0;
-
- if (eth->soc->dma_type & MTK_PDMA)
- status |= mtk_reg_r32(eth, MTK_REG_MTK_INT_STATUS);
- if (eth->soc->dma_type & MTK_QDMA)
- status |= mtk_r32(eth, MTK_QMTK_INT_STATUS);
-
- return status;
-}
-
-static void mtk_irq_ack_status(struct mtk_eth *eth, u32 mask)
-{
- u32 status_reg = MTK_REG_MTK_INT_STATUS;
-
- if (mtk_reg_table[MTK_REG_MTK_INT_STATUS2])
- status_reg = MTK_REG_MTK_INT_STATUS2;
-
- mtk_reg_w32(eth, mask, status_reg);
-}
-
-static u32 mtk_irq_pending_status(struct mtk_eth *eth)
-{
- u32 status_reg = MTK_REG_MTK_INT_STATUS;
-
- if (mtk_reg_table[MTK_REG_MTK_INT_STATUS2])
- status_reg = MTK_REG_MTK_INT_STATUS2;
-
- return mtk_reg_r32(eth, status_reg);
-}
-
-static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
-{
- u32 val;
-
- if (eth->soc->dma_type & MTK_PDMA) {
- val = mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE);
- mtk_reg_w32(eth, val & ~mask, MTK_REG_MTK_INT_ENABLE);
- /* flush write */
- mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE);
- }
- if (eth->soc->dma_type & MTK_QDMA) {
- val = mtk_r32(eth, MTK_QMTK_INT_ENABLE);
- mtk_w32(eth, val & ~mask, MTK_QMTK_INT_ENABLE);
- /* flush write */
- mtk_r32(eth, MTK_QMTK_INT_ENABLE);
- }
-}
-
-static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
-{
- u32 val;
-
- if (eth->soc->dma_type & MTK_PDMA) {
- val = mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE);
- mtk_reg_w32(eth, val | mask, MTK_REG_MTK_INT_ENABLE);
- /* flush write */
- mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE);
- }
- if (eth->soc->dma_type & MTK_QDMA) {
- val = mtk_r32(eth, MTK_QMTK_INT_ENABLE);
- mtk_w32(eth, val | mask, MTK_QMTK_INT_ENABLE);
- /* flush write */
- mtk_r32(eth, MTK_QMTK_INT_ENABLE);
- }
-}
-
-static inline u32 mtk_irq_enabled(struct mtk_eth *eth)
-{
- u32 enabled = 0;
-
- if (eth->soc->dma_type & MTK_PDMA)
- enabled |= mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE);
- if (eth->soc->dma_type & MTK_QDMA)
- enabled |= mtk_r32(eth, MTK_QMTK_INT_ENABLE);
-
- return enabled;
-}
-
-static inline void mtk_hw_set_macaddr(struct mtk_mac *mac,
- unsigned char *macaddr)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&mac->hw->page_lock, flags);
- mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1], MTK_GDMA1_MAC_ADRH);
- mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
- (macaddr[4] << 8) | macaddr[5],
- MTK_GDMA1_MAC_ADRL);
- spin_unlock_irqrestore(&mac->hw->page_lock, flags);
-}
-
-static int mtk_set_mac_address(struct net_device *dev, void *p)
-{
- int ret = eth_mac_addr(dev, p);
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
-
- if (ret)
- return ret;
-
- if (eth->soc->set_mac)
- eth->soc->set_mac(mac, dev->dev_addr);
- else
- mtk_hw_set_macaddr(mac, p);
-
- return 0;
-}
-
-static inline int mtk_max_frag_size(int mtu)
-{
- /* make sure buf_size will be at least MAX_RX_LENGTH */
- if (mtu + MTK_RX_ETH_HLEN < MAX_RX_LENGTH)
- mtu = MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
-
- return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-}
-
-static inline int mtk_max_buf_size(int frag_size)
-{
- int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN -
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
- WARN_ON(buf_size < MAX_RX_LENGTH);
-
- return buf_size;
-}
-
-static inline void mtk_get_rxd(struct mtk_rx_dma *rxd,
- struct mtk_rx_dma *dma_rxd)
-{
- rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
- rxd->rxd2 = READ_ONCE(dma_rxd->rxd2);
- rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
- rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
-}
-
-static inline void mtk_set_txd_pdma(struct mtk_tx_dma *txd,
- struct mtk_tx_dma *dma_txd)
-{
- WRITE_ONCE(dma_txd->txd1, txd->txd1);
- WRITE_ONCE(dma_txd->txd3, txd->txd3);
- WRITE_ONCE(dma_txd->txd4, txd->txd4);
- /* clean dma done flag last */
- WRITE_ONCE(dma_txd->txd2, txd->txd2);
-}
-
-static void mtk_clean_rx(struct mtk_eth *eth, struct mtk_rx_ring *ring)
-{
- int i;
-
- if (ring->rx_data && ring->rx_dma) {
- for (i = 0; i < ring->rx_ring_size; i++) {
- if (!ring->rx_data[i])
- continue;
- if (!ring->rx_dma[i].rxd1)
- continue;
- dma_unmap_single(eth->dev,
- ring->rx_dma[i].rxd1,
- ring->rx_buf_size,
- DMA_FROM_DEVICE);
- skb_free_frag(ring->rx_data[i]);
- }
- kfree(ring->rx_data);
- ring->rx_data = NULL;
- }
-
- if (ring->rx_dma) {
- dma_free_coherent(eth->dev,
- ring->rx_ring_size * sizeof(*ring->rx_dma),
- ring->rx_dma,
- ring->rx_phys);
- ring->rx_dma = NULL;
- }
-}
-
-static int mtk_dma_rx_alloc(struct mtk_eth *eth, struct mtk_rx_ring *ring)
-{
- int i, pad = 0;
-
- ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN);
- ring->rx_buf_size = mtk_max_buf_size(ring->frag_size);
- ring->rx_ring_size = eth->soc->dma_ring_size;
- ring->rx_data = kcalloc(ring->rx_ring_size, sizeof(*ring->rx_data),
- GFP_KERNEL);
- if (!ring->rx_data)
- goto no_rx_mem;
-
- for (i = 0; i < ring->rx_ring_size; i++) {
- ring->rx_data[i] = netdev_alloc_frag(ring->frag_size);
- if (!ring->rx_data[i])
- goto no_rx_mem;
- }
-
- ring->rx_dma =
- dma_alloc_coherent(eth->dev,
- ring->rx_ring_size * sizeof(*ring->rx_dma),
- &ring->rx_phys, GFP_ATOMIC | __GFP_ZERO);
- if (!ring->rx_dma)
- goto no_rx_mem;
-
- if (!eth->soc->rx_2b_offset)
- pad = NET_IP_ALIGN;
-
- for (i = 0; i < ring->rx_ring_size; i++) {
- dma_addr_t dma_addr = dma_map_single(eth->dev,
- ring->rx_data[i] + NET_SKB_PAD + pad,
- ring->rx_buf_size,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
- goto no_rx_mem;
- ring->rx_dma[i].rxd1 = (unsigned int)dma_addr;
-
- if (eth->soc->rx_sg_dma)
- ring->rx_dma[i].rxd2 = RX_DMA_PLEN0(ring->rx_buf_size);
- else
- ring->rx_dma[i].rxd2 = RX_DMA_LSO;
- }
- ring->rx_calc_idx = ring->rx_ring_size - 1;
- /* make sure that all changes to the dma ring are flushed before we
- * continue
- */
- wmb();
-
- return 0;
-
-no_rx_mem:
- return -ENOMEM;
-}
-
-static void mtk_txd_unmap(struct device *dev, struct mtk_tx_buf *tx_buf)
-{
- if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
- dma_unmap_single(dev,
- dma_unmap_addr(tx_buf, dma_addr0),
- dma_unmap_len(tx_buf, dma_len0),
- DMA_TO_DEVICE);
- } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
- dma_unmap_page(dev,
- dma_unmap_addr(tx_buf, dma_addr0),
- dma_unmap_len(tx_buf, dma_len0),
- DMA_TO_DEVICE);
- }
- if (tx_buf->flags & MTK_TX_FLAGS_PAGE1)
- dma_unmap_page(dev,
- dma_unmap_addr(tx_buf, dma_addr1),
- dma_unmap_len(tx_buf, dma_len1),
- DMA_TO_DEVICE);
-
- tx_buf->flags = 0;
- if (tx_buf->skb && (tx_buf->skb != (struct sk_buff *)DMA_DUMMY_DESC))
- dev_kfree_skb_any(tx_buf->skb);
- tx_buf->skb = NULL;
-}
-
-static void mtk_pdma_tx_clean(struct mtk_eth *eth)
-{
- struct mtk_tx_ring *ring = &eth->tx_ring;
- int i;
-
- if (ring->tx_buf) {
- for (i = 0; i < ring->tx_ring_size; i++)
- mtk_txd_unmap(eth->dev, &ring->tx_buf[i]);
- kfree(ring->tx_buf);
- ring->tx_buf = NULL;
- }
-
- if (ring->tx_dma) {
- dma_free_coherent(eth->dev,
- ring->tx_ring_size * sizeof(*ring->tx_dma),
- ring->tx_dma,
- ring->tx_phys);
- ring->tx_dma = NULL;
- }
-}
-
-static void mtk_qdma_tx_clean(struct mtk_eth *eth)
-{
- struct mtk_tx_ring *ring = &eth->tx_ring;
- int i;
-
- if (ring->tx_buf) {
- for (i = 0; i < ring->tx_ring_size; i++)
- mtk_txd_unmap(eth->dev, &ring->tx_buf[i]);
- kfree(ring->tx_buf);
- ring->tx_buf = NULL;
- }
-
- if (ring->tx_dma) {
- dma_free_coherent(eth->dev,
- ring->tx_ring_size * sizeof(*ring->tx_dma),
- ring->tx_dma,
- ring->tx_phys);
- ring->tx_dma = NULL;
- }
-}
-
-void mtk_stats_update_mac(struct mtk_mac *mac)
-{
- struct mtk_hw_stats *hw_stats = mac->hw_stats;
- unsigned int base = mtk_reg_table[MTK_REG_MTK_COUNTER_BASE];
- u64 stats;
-
- base += hw_stats->reg_offset;
-
- u64_stats_update_begin(&hw_stats->syncp);
-
- if (mac->hw->soc->new_stats) {
- hw_stats->rx_bytes += mtk_r32(mac->hw, base);
- stats = mtk_r32(mac->hw, base + 0x04);
- if (stats)
- hw_stats->rx_bytes += (stats << 32);
- hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x08);
- hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x10);
- hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x14);
- hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x18);
- hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x1c);
- hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x20);
- hw_stats->rx_flow_control_packets +=
- mtk_r32(mac->hw, base + 0x24);
- hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x28);
- hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x2c);
- hw_stats->tx_bytes += mtk_r32(mac->hw, base + 0x30);
- stats = mtk_r32(mac->hw, base + 0x34);
- if (stats)
- hw_stats->tx_bytes += (stats << 32);
- hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x38);
- } else {
- hw_stats->tx_bytes += mtk_r32(mac->hw, base);
- hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x04);
- hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x08);
- hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x0c);
- hw_stats->rx_bytes += mtk_r32(mac->hw, base + 0x20);
- hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x24);
- hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x28);
- hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x2c);
- hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x30);
- hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x34);
- hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x38);
- hw_stats->rx_flow_control_packets +=
- mtk_r32(mac->hw, base + 0x3c);
- }
-
- u64_stats_update_end(&hw_stats->syncp);
-}
-
-static void mtk_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *storage)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_hw_stats *hw_stats = mac->hw_stats;
- unsigned int base = mtk_reg_table[MTK_REG_MTK_COUNTER_BASE];
- unsigned int start;
-
- if (!base) {
- netdev_stats_to_stats64(storage, &dev->stats);
- return;
- }
-
- if (netif_running(dev) && netif_device_present(dev)) {
- if (spin_trylock(&hw_stats->stats_lock)) {
- mtk_stats_update_mac(mac);
- spin_unlock(&hw_stats->stats_lock);
- }
- }
-
- do {
- start = u64_stats_fetch_begin_irq(&hw_stats->syncp);
- storage->rx_packets = hw_stats->rx_packets;
- storage->tx_packets = hw_stats->tx_packets;
- storage->rx_bytes = hw_stats->rx_bytes;
- storage->tx_bytes = hw_stats->tx_bytes;
- storage->collisions = hw_stats->tx_collisions;
- storage->rx_length_errors = hw_stats->rx_short_errors +
- hw_stats->rx_long_errors;
- storage->rx_over_errors = hw_stats->rx_overflow;
- storage->rx_crc_errors = hw_stats->rx_fcs_errors;
- storage->rx_errors = hw_stats->rx_checksum_errors;
- storage->tx_aborted_errors = hw_stats->tx_skip;
- } while (u64_stats_fetch_retry_irq(&hw_stats->syncp, start));
-
- storage->tx_errors = dev->stats.tx_errors;
- storage->rx_dropped = dev->stats.rx_dropped;
- storage->tx_dropped = dev->stats.tx_dropped;
-}
-
-static int mtk_vlan_rx_add_vid(struct net_device *dev,
- __be16 proto, u16 vid)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- u32 idx = (vid & 0xf);
- u32 vlan_cfg;
-
- if (!((mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE]) &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_TX)))
- return 0;
-
- if (test_bit(idx, &eth->vlan_map)) {
- netdev_warn(dev, "disable tx vlan offload\n");
- dev->wanted_features &= ~NETIF_F_HW_VLAN_CTAG_TX;
- netdev_update_features(dev);
- } else {
- vlan_cfg = mtk_r32(eth,
- mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE] +
- ((idx >> 1) << 2));
- if (idx & 0x1) {
- vlan_cfg &= 0xffff;
- vlan_cfg |= (vid << 16);
- } else {
- vlan_cfg &= 0xffff0000;
- vlan_cfg |= vid;
- }
- mtk_w32(eth,
- vlan_cfg, mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE] +
- ((idx >> 1) << 2));
- set_bit(idx, &eth->vlan_map);
- }
-
- return 0;
-}
-
-static int mtk_vlan_rx_kill_vid(struct net_device *dev,
- __be16 proto, u16 vid)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- u32 idx = (vid & 0xf);
-
- if (!((mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE]) &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_TX)))
- return 0;
-
- clear_bit(idx, &eth->vlan_map);
-
- return 0;
-}
-
-static inline u32 mtk_pdma_empty_txd(struct mtk_tx_ring *ring)
-{
- barrier();
- return (u32)(ring->tx_ring_size -
- ((ring->tx_next_idx - ring->tx_free_idx) &
- (ring->tx_ring_size - 1)));
-}
-
-static int mtk_skb_padto(struct sk_buff *skb, struct mtk_eth *eth)
-{
- unsigned int len;
- int ret;
-
- if (unlikely(skb->len >= VLAN_ETH_ZLEN))
- return 0;
-
- if (eth->soc->padding_64b && !eth->soc->padding_bug)
- return 0;
-
- if (skb_vlan_tag_present(skb))
- len = ETH_ZLEN;
- else if (skb->protocol == cpu_to_be16(ETH_P_8021Q))
- len = VLAN_ETH_ZLEN;
- else if (!eth->soc->padding_64b)
- len = ETH_ZLEN;
- else
- return 0;
-
- if (skb->len >= len)
- return 0;
-
- ret = skb_pad(skb, len - skb->len);
- if (ret < 0)
- return ret;
- skb->len = len;
- skb_set_tail_pointer(skb, len);
-
- return ret;
-}
-
-static int mtk_pdma_tx_map(struct sk_buff *skb, struct net_device *dev,
- int tx_num, struct mtk_tx_ring *ring, bool gso)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- struct skb_frag_struct *frag;
- struct mtk_tx_dma txd, *ptxd;
- struct mtk_tx_buf *tx_buf;
- int i, j, k, frag_size, frag_map_size, offset;
- dma_addr_t mapped_addr;
- unsigned int nr_frags;
- u32 def_txd4;
-
- if (mtk_skb_padto(skb, eth)) {
- netif_warn(eth, tx_err, dev, "tx padding failed!\n");
- return -1;
- }
-
- tx_buf = &ring->tx_buf[ring->tx_next_idx];
- memset(tx_buf, 0, sizeof(*tx_buf));
- memset(&txd, 0, sizeof(txd));
- nr_frags = skb_shinfo(skb)->nr_frags;
-
- /* init tx descriptor */
- def_txd4 = eth->soc->txd4;
- txd.txd4 = def_txd4;
-
- if (eth->soc->mac_count > 1)
- txd.txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT;
-
- if (gso)
- txd.txd4 |= TX_DMA_TSO;
-
- /* TX Checksum offload */
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- txd.txd4 |= TX_DMA_CHKSUM;
-
- /* VLAN header offload */
- if (skb_vlan_tag_present(skb)) {
- u16 tag = skb_vlan_tag_get(skb);
-
- txd.txd4 |= TX_DMA_INS_VLAN |
- ((tag >> VLAN_PRIO_SHIFT) << 4) |
- (tag & 0xF);
- }
-
- mapped_addr = dma_map_single(&dev->dev, skb->data,
- skb_headlen(skb), DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
- return -1;
-
- txd.txd1 = mapped_addr;
- txd.txd2 = TX_DMA_PLEN0(skb_headlen(skb));
-
- tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
- dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
- dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
-
- /* TX SG offload */
- j = ring->tx_next_idx;
- k = 0;
- for (i = 0; i < nr_frags; i++) {
- offset = 0;
- frag = &skb_shinfo(skb)->frags[i];
- frag_size = skb_frag_size(frag);
-
- while (frag_size > 0) {
- frag_map_size = min(frag_size, TX_DMA_BUF_LEN);
- mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
- frag_map_size,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
- goto err_dma;
-
- if (k & 0x1) {
- j = NEXT_TX_DESP_IDX(j);
- txd.txd1 = mapped_addr;
- txd.txd2 = TX_DMA_PLEN0(frag_map_size);
- txd.txd4 = def_txd4;
-
- tx_buf = &ring->tx_buf[j];
- memset(tx_buf, 0, sizeof(*tx_buf));
-
- tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
- dma_unmap_addr_set(tx_buf, dma_addr0,
- mapped_addr);
- dma_unmap_len_set(tx_buf, dma_len0,
- frag_map_size);
- } else {
- txd.txd3 = mapped_addr;
- txd.txd2 |= TX_DMA_PLEN1(frag_map_size);
-
- tx_buf->skb = (struct sk_buff *)DMA_DUMMY_DESC;
- tx_buf->flags |= MTK_TX_FLAGS_PAGE1;
- dma_unmap_addr_set(tx_buf, dma_addr1,
- mapped_addr);
- dma_unmap_len_set(tx_buf, dma_len1,
- frag_map_size);
-
- if (!((i == (nr_frags - 1)) &&
- (frag_map_size == frag_size))) {
- mtk_set_txd_pdma(&txd,
- &ring->tx_dma[j]);
- memset(&txd, 0, sizeof(txd));
- }
- }
- frag_size -= frag_map_size;
- offset += frag_map_size;
- k++;
- }
- }
-
- /* set last segment */
- if (k & 0x1)
- txd.txd2 |= TX_DMA_LS1;
- else
- txd.txd2 |= TX_DMA_LS0;
- mtk_set_txd_pdma(&txd, &ring->tx_dma[j]);
-
- /* store skb to cleanup */
- tx_buf->skb = skb;
-
- netdev_sent_queue(dev, skb->len);
- skb_tx_timestamp(skb);
-
- ring->tx_next_idx = NEXT_TX_DESP_IDX(j);
- /* make sure that all changes to the dma ring are flushed before we
- * continue
- */
- wmb();
- atomic_set(&ring->tx_free_count, mtk_pdma_empty_txd(ring));
-
- if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more)
- mtk_reg_w32(eth, ring->tx_next_idx, MTK_REG_TX_CTX_IDX0);
-
- return 0;
-
-err_dma:
- j = ring->tx_next_idx;
- for (i = 0; i < tx_num; i++) {
- ptxd = &ring->tx_dma[j];
- tx_buf = &ring->tx_buf[j];
-
- /* unmap dma */
- mtk_txd_unmap(&dev->dev, tx_buf);
-
- ptxd->txd2 = TX_DMA_DESP2_DEF;
- j = NEXT_TX_DESP_IDX(j);
- }
- /* make sure that all changes to the dma ring are flushed before we
- * continue
- */
- wmb();
- return -1;
-}
-
-/* the qdma core needs scratch memory to be setup */
-static int mtk_init_fq_dma(struct mtk_eth *eth)
-{
- dma_addr_t dma_addr, phy_ring_head, phy_ring_tail;
- int cnt = eth->soc->dma_ring_size;
- int i;
-
- eth->scratch_ring = dma_alloc_coherent(eth->dev,
- cnt * sizeof(struct mtk_tx_dma),
- &phy_ring_head,
- GFP_ATOMIC | __GFP_ZERO);
- if (unlikely(!eth->scratch_ring))
- return -ENOMEM;
-
- eth->scratch_head = kcalloc(cnt, QDMA_PAGE_SIZE,
- GFP_KERNEL);
- dma_addr = dma_map_single(eth->dev,
- eth->scratch_head, cnt * QDMA_PAGE_SIZE,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
- return -ENOMEM;
-
- memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
- phy_ring_tail = phy_ring_head + (sizeof(struct mtk_tx_dma) * (cnt - 1));
-
- for (i = 0; i < cnt; i++) {
- eth->scratch_ring[i].txd1 = (dma_addr + (i * QDMA_PAGE_SIZE));
- if (i < cnt - 1)
- eth->scratch_ring[i].txd2 = (phy_ring_head +
- ((i + 1) * sizeof(struct mtk_tx_dma)));
- eth->scratch_ring[i].txd3 = TX_QDMA_SDL(QDMA_PAGE_SIZE);
- }
-
- mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD);
- mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL);
- mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT);
- mtk_w32(eth, QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN);
-
- return 0;
-}
-
-static void *mtk_qdma_phys_to_virt(struct mtk_tx_ring *ring, u32 desc)
-{
- void *ret = ring->tx_dma;
-
- return ret + (desc - ring->tx_phys);
-}
-
-static struct mtk_tx_dma *mtk_tx_next_qdma(struct mtk_tx_ring *ring,
- struct mtk_tx_dma *txd)
-{
- return mtk_qdma_phys_to_virt(ring, txd->txd2);
-}
-
-static struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring,
- struct mtk_tx_dma *txd)
-{
- int idx = txd - ring->tx_dma;
-
- return &ring->tx_buf[idx];
-}
-
-static int mtk_qdma_tx_map(struct sk_buff *skb, struct net_device *dev,
- int tx_num, struct mtk_tx_ring *ring, bool gso)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- struct mtk_tx_dma *itxd, *txd;
- struct mtk_tx_buf *tx_buf;
- dma_addr_t mapped_addr;
- unsigned int nr_frags;
- int i, n_desc = 1;
- u32 txd4 = eth->soc->txd4;
-
- itxd = ring->tx_next_free;
- if (itxd == ring->tx_last_free)
- return -ENOMEM;
-
- if (eth->soc->mac_count > 1)
- txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT;
-
- tx_buf = mtk_desc_to_tx_buf(ring, itxd);
- memset(tx_buf, 0, sizeof(*tx_buf));
-
- if (gso)
- txd4 |= TX_DMA_TSO;
-
- /* TX Checksum offload */
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- txd4 |= TX_DMA_CHKSUM;
-
- /* VLAN header offload */
- if (skb_vlan_tag_present(skb))
- txd4 |= TX_DMA_INS_VLAN_MT7621 | skb_vlan_tag_get(skb);
-
- mapped_addr = dma_map_single(&dev->dev, skb->data,
- skb_headlen(skb), DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
- return -ENOMEM;
-
- WRITE_ONCE(itxd->txd1, mapped_addr);
- tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
- dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
- dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
-
- /* TX SG offload */
- txd = itxd;
- nr_frags = skb_shinfo(skb)->nr_frags;
- for (i = 0; i < nr_frags; i++) {
- struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
- unsigned int offset = 0;
- int frag_size = skb_frag_size(frag);
-
- while (frag_size) {
- bool last_frag = false;
- unsigned int frag_map_size;
-
- txd = mtk_tx_next_qdma(ring, txd);
- if (txd == ring->tx_last_free)
- goto err_dma;
-
- n_desc++;
- frag_map_size = min(frag_size, TX_DMA_BUF_LEN);
- mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
- frag_map_size,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
- goto err_dma;
-
- if (i == nr_frags - 1 &&
- (frag_size - frag_map_size) == 0)
- last_frag = true;
-
- WRITE_ONCE(txd->txd1, mapped_addr);
- WRITE_ONCE(txd->txd3, (QDMA_TX_SWC |
- TX_DMA_PLEN0(frag_map_size) |
- last_frag * TX_DMA_LS0) |
- mac->id);
- WRITE_ONCE(txd->txd4, 0);
-
- tx_buf->skb = (struct sk_buff *)DMA_DUMMY_DESC;
- tx_buf = mtk_desc_to_tx_buf(ring, txd);
- memset(tx_buf, 0, sizeof(*tx_buf));
-
- tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
- dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
- dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
- frag_size -= frag_map_size;
- offset += frag_map_size;
- }
- }
-
- /* store skb to cleanup */
- tx_buf->skb = skb;
-
- WRITE_ONCE(itxd->txd4, txd4);
- WRITE_ONCE(itxd->txd3, (QDMA_TX_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
- (!nr_frags * TX_DMA_LS0)));
-
- netdev_sent_queue(dev, skb->len);
- skb_tx_timestamp(skb);
-
- ring->tx_next_free = mtk_tx_next_qdma(ring, txd);
- atomic_sub(n_desc, &ring->tx_free_count);
-
- /* make sure that all changes to the dma ring are flushed before we
- * continue
- */
- wmb();
-
- if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more)
- mtk_w32(eth, txd->txd2, MTK_QTX_CTX_PTR);
-
- return 0;
-
-err_dma:
- do {
- tx_buf = mtk_desc_to_tx_buf(ring, txd);
-
- /* unmap dma */
- mtk_txd_unmap(&dev->dev, tx_buf);
-
- itxd->txd3 = TX_DMA_DESP2_DEF;
- itxd = mtk_tx_next_qdma(ring, itxd);
- } while (itxd != txd);
-
- return -ENOMEM;
-}
-
-static inline int mtk_cal_txd_req(struct sk_buff *skb)
-{
- int i, nfrags;
- struct skb_frag_struct *frag;
-
- nfrags = 1;
- if (skb_is_gso(skb)) {
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- frag = &skb_shinfo(skb)->frags[i];
- nfrags += DIV_ROUND_UP(frag->size, TX_DMA_BUF_LEN);
- }
- } else {
- nfrags += skb_shinfo(skb)->nr_frags;
- }
-
- return DIV_ROUND_UP(nfrags, 2);
-}
-
-static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- struct mtk_tx_ring *ring = &eth->tx_ring;
- struct net_device_stats *stats = &dev->stats;
- int tx_num;
- int len = skb->len;
- bool gso = false;
-
- tx_num = mtk_cal_txd_req(skb);
- if (unlikely(atomic_read(&ring->tx_free_count) <= tx_num)) {
- netif_stop_queue(dev);
- netif_err(eth, tx_queued, dev,
- "Tx Ring full when queue awake!\n");
- return NETDEV_TX_BUSY;
- }
-
- /* TSO: fill MSS info in tcp checksum field */
- if (skb_is_gso(skb)) {
- if (skb_cow_head(skb, 0)) {
- netif_warn(eth, tx_err, dev,
- "GSO expand head fail.\n");
- goto drop;
- }
-
- if (skb_shinfo(skb)->gso_type &
- (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
- gso = true;
- tcp_hdr(skb)->check = htons(skb_shinfo(skb)->gso_size);
- }
- }
-
- if (ring->tx_map(skb, dev, tx_num, ring, gso) < 0)
- goto drop;
-
- stats->tx_packets++;
- stats->tx_bytes += len;
-
- if (unlikely(atomic_read(&ring->tx_free_count) <= ring->tx_thresh)) {
- netif_stop_queue(dev);
- smp_mb();
- if (unlikely(atomic_read(&ring->tx_free_count) >
- ring->tx_thresh))
- netif_wake_queue(dev);
- }
-
- return NETDEV_TX_OK;
-
-drop:
- stats->tx_dropped++;
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
-}
-
-static int mtk_poll_rx(struct napi_struct *napi, int budget,
- struct mtk_eth *eth, u32 rx_intr)
-{
- struct mtk_soc_data *soc = eth->soc;
- struct mtk_rx_ring *ring = &eth->rx_ring[0];
- int idx = ring->rx_calc_idx;
- u32 checksum_bit;
- struct sk_buff *skb;
- u8 *data, *new_data;
- struct mtk_rx_dma *rxd, trxd;
- int done = 0, pad;
-
- if (eth->soc->hw_features & NETIF_F_RXCSUM)
- checksum_bit = soc->checksum_bit;
- else
- checksum_bit = 0;
-
- if (eth->soc->rx_2b_offset)
- pad = 0;
- else
- pad = NET_IP_ALIGN;
-
- while (done < budget) {
- struct net_device *netdev;
- unsigned int pktlen;
- dma_addr_t dma_addr;
- int mac = 0;
-
- idx = NEXT_RX_DESP_IDX(idx);
- rxd = &ring->rx_dma[idx];
- data = ring->rx_data[idx];
-
- mtk_get_rxd(&trxd, rxd);
- if (!(trxd.rxd2 & RX_DMA_DONE))
- break;
-
- /* find out which mac the packet come from. values start at 1 */
- if (eth->soc->mac_count > 1) {
- mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
- RX_DMA_FPORT_MASK;
- mac--;
- if (mac < 0 || mac >= eth->soc->mac_count)
- goto release_desc;
- }
-
- netdev = eth->netdev[mac];
-
- /* alloc new buffer */
- new_data = napi_alloc_frag(ring->frag_size);
- if (unlikely(!new_data || !netdev)) {
- netdev->stats.rx_dropped++;
- goto release_desc;
- }
- dma_addr = dma_map_single(&netdev->dev,
- new_data + NET_SKB_PAD + pad,
- ring->rx_buf_size,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
- skb_free_frag(new_data);
- goto release_desc;
- }
-
- /* receive data */
- skb = build_skb(data, ring->frag_size);
- if (unlikely(!skb)) {
- put_page(virt_to_head_page(new_data));
- goto release_desc;
- }
- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-
- dma_unmap_single(&netdev->dev, trxd.rxd1,
- ring->rx_buf_size, DMA_FROM_DEVICE);
- pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
- skb->dev = netdev;
- skb_put(skb, pktlen);
- if (trxd.rxd4 & checksum_bit)
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- else
- skb_checksum_none_assert(skb);
- skb->protocol = eth_type_trans(skb, netdev);
-
- netdev->stats.rx_packets++;
- netdev->stats.rx_bytes += pktlen;
-
- if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
- RX_DMA_VID(trxd.rxd3))
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
- RX_DMA_VID(trxd.rxd3));
- napi_gro_receive(napi, skb);
-
- ring->rx_data[idx] = new_data;
- rxd->rxd1 = (unsigned int)dma_addr;
-
-release_desc:
- if (eth->soc->rx_sg_dma)
- rxd->rxd2 = RX_DMA_PLEN0(ring->rx_buf_size);
- else
- rxd->rxd2 = RX_DMA_LSO;
-
- ring->rx_calc_idx = idx;
- /* make sure that all changes to the dma ring are flushed before
- * we continue
- */
- wmb();
- if (eth->soc->dma_type == MTK_QDMA)
- mtk_w32(eth, ring->rx_calc_idx, MTK_QRX_CRX_IDX0);
- else
- mtk_reg_w32(eth, ring->rx_calc_idx,
- MTK_REG_RX_CALC_IDX0);
- done++;
- }
-
- if (done < budget)
- mtk_irq_ack(eth, rx_intr);
-
- return done;
-}
-
-static int mtk_pdma_tx_poll(struct mtk_eth *eth, int budget, bool *tx_again)
-{
- struct sk_buff *skb;
- struct mtk_tx_buf *tx_buf;
- int done = 0;
- u32 idx, hwidx;
- struct mtk_tx_ring *ring = &eth->tx_ring;
- unsigned int bytes = 0;
-
- idx = ring->tx_free_idx;
- hwidx = mtk_reg_r32(eth, MTK_REG_TX_DTX_IDX0);
-
- while ((idx != hwidx) && budget) {
- tx_buf = &ring->tx_buf[idx];
- skb = tx_buf->skb;
-
- if (!skb)
- break;
-
- if (skb != (struct sk_buff *)DMA_DUMMY_DESC) {
- bytes += skb->len;
- done++;
- budget--;
- }
- mtk_txd_unmap(eth->dev, tx_buf);
- idx = NEXT_TX_DESP_IDX(idx);
- }
- ring->tx_free_idx = idx;
- atomic_set(&ring->tx_free_count, mtk_pdma_empty_txd(ring));
-
- /* read hw index again make sure no new tx packet */
- if (idx != hwidx || idx != mtk_reg_r32(eth, MTK_REG_TX_DTX_IDX0))
- *tx_again = 1;
-
- if (done)
- netdev_completed_queue(*eth->netdev, done, bytes);
-
- return done;
-}
-
-static int mtk_qdma_tx_poll(struct mtk_eth *eth, int budget, bool *tx_again)
-{
- struct mtk_tx_ring *ring = &eth->tx_ring;
- struct mtk_tx_dma *desc;
- struct sk_buff *skb;
- struct mtk_tx_buf *tx_buf;
- int total = 0, done[MTK_MAX_DEVS];
- unsigned int bytes[MTK_MAX_DEVS];
- u32 cpu, dma;
- int i;
-
- memset(done, 0, sizeof(done));
- memset(bytes, 0, sizeof(bytes));
-
- cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
- dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
-
- desc = mtk_qdma_phys_to_virt(ring, cpu);
-
- while ((cpu != dma) && budget) {
- u32 next_cpu = desc->txd2;
- int mac;
-
- desc = mtk_tx_next_qdma(ring, desc);
- if ((desc->txd3 & QDMA_TX_OWNER_CPU) == 0)
- break;
-
- mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
- TX_DMA_FPORT_MASK;
- mac--;
-
- tx_buf = mtk_desc_to_tx_buf(ring, desc);
- skb = tx_buf->skb;
- if (!skb)
- break;
-
- if (skb != (struct sk_buff *)DMA_DUMMY_DESC) {
- bytes[mac] += skb->len;
- done[mac]++;
- budget--;
- }
- mtk_txd_unmap(eth->dev, tx_buf);
-
- ring->tx_last_free->txd2 = next_cpu;
- ring->tx_last_free = desc;
- atomic_inc(&ring->tx_free_count);
-
- cpu = next_cpu;
- }
-
- mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
-
- /* read hw index again make sure no new tx packet */
- if (cpu != dma || cpu != mtk_r32(eth, MTK_QTX_DRX_PTR))
- *tx_again = true;
-
- for (i = 0; i < eth->soc->mac_count; i++) {
- if (!done[i])
- continue;
- netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
- total += done[i];
- }
-
- return total;
-}
-
-static int mtk_poll_tx(struct mtk_eth *eth, int budget, u32 tx_intr,
- bool *tx_again)
-{
- struct mtk_tx_ring *ring = &eth->tx_ring;
- struct net_device *netdev = eth->netdev[0];
- int done;
-
- done = eth->tx_ring.tx_poll(eth, budget, tx_again);
- if (!*tx_again)
- mtk_irq_ack(eth, tx_intr);
-
- if (!done)
- return 0;
-
- smp_mb();
- if (unlikely(!netif_queue_stopped(netdev)))
- return done;
-
- if (atomic_read(&ring->tx_free_count) > ring->tx_thresh)
- netif_wake_queue(netdev);
-
- return done;
-}
-
-static void mtk_stats_update(struct mtk_eth *eth)
-{
- int i;
-
- for (i = 0; i < eth->soc->mac_count; i++) {
- if (!eth->mac[i] || !eth->mac[i]->hw_stats)
- continue;
- if (spin_trylock(&eth->mac[i]->hw_stats->stats_lock)) {
- mtk_stats_update_mac(eth->mac[i]);
- spin_unlock(&eth->mac[i]->hw_stats->stats_lock);
- }
- }
-}
-
-static int mtk_poll(struct napi_struct *napi, int budget)
-{
- struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
- u32 status, mtk_status, mask, tx_intr, rx_intr, status_intr;
- int tx_done, rx_done;
- bool tx_again = false;
-
- status = mtk_irq_pending(eth);
- mtk_status = mtk_irq_pending_status(eth);
- tx_intr = eth->soc->tx_int;
- rx_intr = eth->soc->rx_int;
- status_intr = eth->soc->status_int;
- tx_done = 0;
- rx_done = 0;
- tx_again = 0;
-
- if (status & tx_intr)
- tx_done = mtk_poll_tx(eth, budget, tx_intr, &tx_again);
-
- if (status & rx_intr)
- rx_done = mtk_poll_rx(napi, budget, eth, rx_intr);
-
- if (unlikely(mtk_status & status_intr)) {
- mtk_stats_update(eth);
- mtk_irq_ack_status(eth, status_intr);
- }
-
- if (unlikely(netif_msg_intr(eth))) {
- mask = mtk_irq_enabled(eth);
- netdev_info(eth->netdev[0],
- "done tx %d, rx %d, intr 0x%08x/0x%x\n",
- tx_done, rx_done, status, mask);
- }
-
- if (tx_again || rx_done == budget)
- return budget;
-
- status = mtk_irq_pending(eth);
- if (status & (tx_intr | rx_intr))
- return budget;
-
- napi_complete(napi);
- mtk_irq_enable(eth, tx_intr | rx_intr);
-
- return rx_done;
-}
-
-static int mtk_pdma_tx_alloc(struct mtk_eth *eth)
-{
- int i;
- struct mtk_tx_ring *ring = &eth->tx_ring;
-
- ring->tx_ring_size = eth->soc->dma_ring_size;
- ring->tx_free_idx = 0;
- ring->tx_next_idx = 0;
- ring->tx_thresh = max((unsigned long)ring->tx_ring_size >> 2,
- MAX_SKB_FRAGS);
-
- ring->tx_buf = kcalloc(ring->tx_ring_size, sizeof(*ring->tx_buf),
- GFP_KERNEL);
- if (!ring->tx_buf)
- goto no_tx_mem;
-
- ring->tx_dma =
- dma_alloc_coherent(eth->dev,
- ring->tx_ring_size * sizeof(*ring->tx_dma),
- &ring->tx_phys, GFP_ATOMIC | __GFP_ZERO);
- if (!ring->tx_dma)
- goto no_tx_mem;
-
- for (i = 0; i < ring->tx_ring_size; i++) {
- ring->tx_dma[i].txd2 = TX_DMA_DESP2_DEF;
- ring->tx_dma[i].txd4 = eth->soc->txd4;
- }
-
- atomic_set(&ring->tx_free_count, mtk_pdma_empty_txd(ring));
- ring->tx_map = mtk_pdma_tx_map;
- ring->tx_poll = mtk_pdma_tx_poll;
- ring->tx_clean = mtk_pdma_tx_clean;
-
- /* make sure that all changes to the dma ring are flushed before we
- * continue
- */
- wmb();
-
- mtk_reg_w32(eth, ring->tx_phys, MTK_REG_TX_BASE_PTR0);
- mtk_reg_w32(eth, ring->tx_ring_size, MTK_REG_TX_MAX_CNT0);
- mtk_reg_w32(eth, 0, MTK_REG_TX_CTX_IDX0);
- mtk_reg_w32(eth, MTK_PST_DTX_IDX0, MTK_REG_PDMA_RST_CFG);
-
- return 0;
-
-no_tx_mem:
- return -ENOMEM;
-}
-
-static int mtk_qdma_tx_alloc_tx(struct mtk_eth *eth)
-{
- struct mtk_tx_ring *ring = &eth->tx_ring;
- int i, sz = sizeof(*ring->tx_dma);
-
- ring->tx_ring_size = eth->soc->dma_ring_size;
- ring->tx_buf = kcalloc(ring->tx_ring_size, sizeof(*ring->tx_buf),
- GFP_KERNEL);
- if (!ring->tx_buf)
- goto no_tx_mem;
-
- ring->tx_dma = dma_alloc_coherent(eth->dev, ring->tx_ring_size * sz,
- &ring->tx_phys,
- GFP_ATOMIC | __GFP_ZERO);
- if (!ring->tx_dma)
- goto no_tx_mem;
-
- for (i = 0; i < ring->tx_ring_size; i++) {
- int next = (i + 1) % ring->tx_ring_size;
- u32 next_ptr = ring->tx_phys + next * sz;
-
- ring->tx_dma[i].txd2 = next_ptr;
- ring->tx_dma[i].txd3 = TX_DMA_DESP2_DEF;
- }
-
- atomic_set(&ring->tx_free_count, ring->tx_ring_size - 2);
- ring->tx_next_free = &ring->tx_dma[0];
- ring->tx_last_free = &ring->tx_dma[ring->tx_ring_size - 2];
- ring->tx_thresh = max((unsigned long)ring->tx_ring_size >> 2,
- MAX_SKB_FRAGS);
-
- ring->tx_map = mtk_qdma_tx_map;
- ring->tx_poll = mtk_qdma_tx_poll;
- ring->tx_clean = mtk_qdma_tx_clean;
-
- /* make sure that all changes to the dma ring are flushed before we
- * continue
- */
- wmb();
-
- mtk_w32(eth, ring->tx_phys, MTK_QTX_CTX_PTR);
- mtk_w32(eth, ring->tx_phys, MTK_QTX_DTX_PTR);
- mtk_w32(eth,
- ring->tx_phys + ((ring->tx_ring_size - 1) * sz),
- MTK_QTX_CRX_PTR);
- mtk_w32(eth,
- ring->tx_phys + ((ring->tx_ring_size - 1) * sz),
- MTK_QTX_DRX_PTR);
-
- return 0;
-
-no_tx_mem:
- return -ENOMEM;
-}
-
-static int mtk_qdma_init(struct mtk_eth *eth, int ring)
-{
- int err;
-
- err = mtk_init_fq_dma(eth);
- if (err)
- return err;
-
- err = mtk_qdma_tx_alloc_tx(eth);
- if (err)
- return err;
-
- err = mtk_dma_rx_alloc(eth, &eth->rx_ring[ring]);
- if (err)
- return err;
-
- mtk_w32(eth, eth->rx_ring[ring].rx_phys, MTK_QRX_BASE_PTR0);
- mtk_w32(eth, eth->rx_ring[ring].rx_ring_size, MTK_QRX_MAX_CNT0);
- mtk_w32(eth, eth->rx_ring[ring].rx_calc_idx, MTK_QRX_CRX_IDX0);
- mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX);
- mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
-
- /* Enable random early drop and set drop threshold automatically */
- mtk_w32(eth, 0x174444, MTK_QDMA_FC_THRES);
- mtk_w32(eth, 0x0, MTK_QDMA_HRED2);
-
- return 0;
-}
-
-static int mtk_pdma_qdma_init(struct mtk_eth *eth)
-{
- int err = mtk_qdma_init(eth, 1);
-
- if (err)
- return err;
-
- err = mtk_dma_rx_alloc(eth, &eth->rx_ring[0]);
- if (err)
- return err;
-
- mtk_reg_w32(eth, eth->rx_ring[0].rx_phys, MTK_REG_RX_BASE_PTR0);
- mtk_reg_w32(eth, eth->rx_ring[0].rx_ring_size, MTK_REG_RX_MAX_CNT0);
- mtk_reg_w32(eth, eth->rx_ring[0].rx_calc_idx, MTK_REG_RX_CALC_IDX0);
- mtk_reg_w32(eth, MTK_PST_DRX_IDX0, MTK_REG_PDMA_RST_CFG);
-
- return 0;
-}
-
-static int mtk_pdma_init(struct mtk_eth *eth)
-{
- struct mtk_rx_ring *ring = &eth->rx_ring[0];
- int err;
-
- err = mtk_pdma_tx_alloc(eth);
- if (err)
- return err;
-
- err = mtk_dma_rx_alloc(eth, ring);
- if (err)
- return err;
-
- mtk_reg_w32(eth, ring->rx_phys, MTK_REG_RX_BASE_PTR0);
- mtk_reg_w32(eth, ring->rx_ring_size, MTK_REG_RX_MAX_CNT0);
- mtk_reg_w32(eth, ring->rx_calc_idx, MTK_REG_RX_CALC_IDX0);
- mtk_reg_w32(eth, MTK_PST_DRX_IDX0, MTK_REG_PDMA_RST_CFG);
-
- return 0;
-}
-
-static void mtk_dma_free(struct mtk_eth *eth)
-{
- int i;
-
- for (i = 0; i < eth->soc->mac_count; i++)
- if (eth->netdev[i])
- netdev_reset_queue(eth->netdev[i]);
- eth->tx_ring.tx_clean(eth);
- mtk_clean_rx(eth, &eth->rx_ring[0]);
- mtk_clean_rx(eth, &eth->rx_ring[1]);
- kfree(eth->scratch_head);
-}
-
-static void mtk_tx_timeout(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- struct mtk_tx_ring *ring = &eth->tx_ring;
-
- eth->netdev[mac->id]->stats.tx_errors++;
- netif_err(eth, tx_err, dev,
- "transmit timed out\n");
- if (eth->soc->dma_type & MTK_PDMA) {
- netif_info(eth, drv, dev, "pdma_cfg:%08x\n",
- mtk_reg_r32(eth, MTK_REG_PDMA_GLO_CFG));
- netif_info(eth, drv, dev,
- "tx_ring=%d, base=%08x, max=%u, ctx=%u, dtx=%u, fdx=%hu, next=%hu\n",
- 0, mtk_reg_r32(eth, MTK_REG_TX_BASE_PTR0),
- mtk_reg_r32(eth, MTK_REG_TX_MAX_CNT0),
- mtk_reg_r32(eth, MTK_REG_TX_CTX_IDX0),
- mtk_reg_r32(eth, MTK_REG_TX_DTX_IDX0),
- ring->tx_free_idx,
- ring->tx_next_idx);
- }
- if (eth->soc->dma_type & MTK_QDMA) {
- netif_info(eth, drv, dev, "qdma_cfg:%08x\n",
- mtk_r32(eth, MTK_QDMA_GLO_CFG));
- netif_info(eth, drv, dev,
- "tx_ring=%d, ctx=%08x, dtx=%08x, crx=%08x, drx=%08x, free=%hu\n",
- 0, mtk_r32(eth, MTK_QTX_CTX_PTR),
- mtk_r32(eth, MTK_QTX_DTX_PTR),
- mtk_r32(eth, MTK_QTX_CRX_PTR),
- mtk_r32(eth, MTK_QTX_DRX_PTR),
- atomic_read(&ring->tx_free_count));
- }
- netif_info(eth, drv, dev,
- "rx_ring=%d, base=%08x, max=%u, calc=%u, drx=%u\n",
- 0, mtk_reg_r32(eth, MTK_REG_RX_BASE_PTR0),
- mtk_reg_r32(eth, MTK_REG_RX_MAX_CNT0),
- mtk_reg_r32(eth, MTK_REG_RX_CALC_IDX0),
- mtk_reg_r32(eth, MTK_REG_RX_DRX_IDX0));
-
- schedule_work(&mac->pending_work);
-}
-
-static irqreturn_t mtk_handle_irq(int irq, void *_eth)
-{
- struct mtk_eth *eth = _eth;
- u32 status, int_mask;
-
- status = mtk_irq_pending(eth);
- if (unlikely(!status))
- return IRQ_NONE;
-
- int_mask = (eth->soc->rx_int | eth->soc->tx_int);
- if (likely(status & int_mask)) {
- if (likely(napi_schedule_prep(&eth->rx_napi)))
- __napi_schedule(&eth->rx_napi);
- } else {
- mtk_irq_ack(eth, status);
- }
- mtk_irq_disable(eth, int_mask);
-
- return IRQ_HANDLED;
-}
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void mtk_poll_controller(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- u32 int_mask = eth->soc->tx_int | eth->soc->rx_int;
-
- mtk_irq_disable(eth, int_mask);
- mtk_handle_irq(dev->irq, dev);
- mtk_irq_enable(eth, int_mask);
-}
-#endif
-
-int mtk_set_clock_cycle(struct mtk_eth *eth)
-{
- unsigned long sysclk = eth->sysclk;
-
- sysclk /= MTK_US_CYC_CNT_DIVISOR;
- sysclk <<= MTK_US_CYC_CNT_SHIFT;
-
- mtk_w32(eth, (mtk_r32(eth, MTK_GLO_CFG) &
- ~(MTK_US_CYC_CNT_MASK << MTK_US_CYC_CNT_SHIFT)) |
- sysclk,
- MTK_GLO_CFG);
- return 0;
-}
-
-void mtk_fwd_config(struct mtk_eth *eth)
-{
- u32 fwd_cfg;
-
- fwd_cfg = mtk_r32(eth, MTK_GDMA1_FWD_CFG);
-
- /* disable jumbo frame */
- if (eth->soc->jumbo_frame)
- fwd_cfg &= ~MTK_GDM1_JMB_EN;
-
- /* set unicast/multicast/broadcast frame to cpu */
- fwd_cfg &= ~0xffff;
-
- mtk_w32(eth, fwd_cfg, MTK_GDMA1_FWD_CFG);
-}
-
-void mtk_csum_config(struct mtk_eth *eth)
-{
- if (eth->soc->hw_features & NETIF_F_RXCSUM)
- mtk_w32(eth, mtk_r32(eth, MTK_GDMA1_FWD_CFG) |
- (MTK_GDM1_ICS_EN | MTK_GDM1_TCS_EN | MTK_GDM1_UCS_EN),
- MTK_GDMA1_FWD_CFG);
- else
- mtk_w32(eth, mtk_r32(eth, MTK_GDMA1_FWD_CFG) &
- ~(MTK_GDM1_ICS_EN | MTK_GDM1_TCS_EN | MTK_GDM1_UCS_EN),
- MTK_GDMA1_FWD_CFG);
- if (eth->soc->hw_features & NETIF_F_IP_CSUM)
- mtk_w32(eth, mtk_r32(eth, MTK_CDMA_CSG_CFG) |
- (MTK_ICS_GEN_EN | MTK_TCS_GEN_EN | MTK_UCS_GEN_EN),
- MTK_CDMA_CSG_CFG);
- else
- mtk_w32(eth, mtk_r32(eth, MTK_CDMA_CSG_CFG) &
- ~(MTK_ICS_GEN_EN | MTK_TCS_GEN_EN | MTK_UCS_GEN_EN),
- MTK_CDMA_CSG_CFG);
-}
-
-static int mtk_start_dma(struct mtk_eth *eth)
-{
- unsigned long flags;
- u32 val;
- int err;
-
- if (eth->soc->dma_type == MTK_PDMA)
- err = mtk_pdma_init(eth);
- else if (eth->soc->dma_type == MTK_QDMA)
- err = mtk_qdma_init(eth, 0);
- else
- err = mtk_pdma_qdma_init(eth);
- if (err) {
- mtk_dma_free(eth);
- return err;
- }
-
- spin_lock_irqsave(&eth->page_lock, flags);
-
- val = MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN;
- if (eth->soc->rx_2b_offset)
- val |= MTK_RX_2B_OFFSET;
- val |= eth->soc->pdma_glo_cfg;
-
- if (eth->soc->dma_type & MTK_PDMA)
- mtk_reg_w32(eth, val, MTK_REG_PDMA_GLO_CFG);
-
- if (eth->soc->dma_type & MTK_QDMA)
- mtk_w32(eth, val, MTK_QDMA_GLO_CFG);
-
- spin_unlock_irqrestore(&eth->page_lock, flags);
-
- return 0;
-}
-
-static int mtk_open(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
-
- dma_coerce_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32));
-
- if (!atomic_read(&eth->dma_refcnt)) {
- int err = mtk_start_dma(eth);
-
- if (err)
- return err;
-
- napi_enable(&eth->rx_napi);
- mtk_irq_enable(eth, eth->soc->tx_int | eth->soc->rx_int);
- }
- atomic_inc(&eth->dma_refcnt);
-
- if (eth->phy)
- eth->phy->start(mac);
-
- if (eth->soc->has_carrier && eth->soc->has_carrier(eth))
- netif_carrier_on(dev);
-
- netif_start_queue(dev);
- eth->soc->fwd_config(eth);
-
- return 0;
-}
-
-static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg)
-{
- unsigned long flags;
- u32 val;
- int i;
-
- /* stop the dma enfine */
- spin_lock_irqsave(&eth->page_lock, flags);
- val = mtk_r32(eth, glo_cfg);
- mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN),
- glo_cfg);
- spin_unlock_irqrestore(&eth->page_lock, flags);
-
- /* wait for dma stop */
- for (i = 0; i < 10; i++) {
- val = mtk_r32(eth, glo_cfg);
- if (val & (MTK_TX_DMA_BUSY | MTK_RX_DMA_BUSY)) {
- msleep(20);
- continue;
- }
- break;
- }
-}
-
-static int mtk_stop(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
-
- netif_tx_disable(dev);
- if (eth->phy)
- eth->phy->stop(mac);
-
- if (!atomic_dec_and_test(&eth->dma_refcnt))
- return 0;
-
- mtk_irq_disable(eth, eth->soc->tx_int | eth->soc->rx_int);
- napi_disable(&eth->rx_napi);
-
- if (eth->soc->dma_type & MTK_PDMA)
- mtk_stop_dma(eth, mtk_reg_table[MTK_REG_PDMA_GLO_CFG]);
-
- if (eth->soc->dma_type & MTK_QDMA)
- mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
-
- mtk_dma_free(eth);
-
- return 0;
-}
-
-static int __init mtk_init_hw(struct mtk_eth *eth)
-{
- int i, err;
-
- eth->soc->reset_fe(eth);
-
- if (eth->soc->switch_init)
- if (eth->soc->switch_init(eth)) {
- dev_err(eth->dev, "failed to initialize switch core\n");
- return -ENODEV;
- }
-
- err = devm_request_irq(eth->dev, eth->irq, mtk_handle_irq, 0,
- dev_name(eth->dev), eth);
- if (err)
- return err;
-
- err = mtk_mdio_init(eth);
- if (err)
- return err;
-
- /* disable delay and normal interrupt */
- mtk_reg_w32(eth, 0, MTK_REG_DLY_INT_CFG);
- if (eth->soc->dma_type & MTK_QDMA)
- mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
- mtk_irq_disable(eth, eth->soc->tx_int | eth->soc->rx_int);
-
- /* frame engine will push VLAN tag regarding to VIDX field in Tx desc */
- if (mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE])
- for (i = 0; i < 16; i += 2)
- mtk_w32(eth, ((i + 1) << 16) + i,
- mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE] +
- (i * 2));
-
- if (eth->soc->fwd_config(eth))
- dev_err(eth->dev, "unable to get clock\n");
-
- if (mtk_reg_table[MTK_REG_MTK_RST_GL]) {
- mtk_reg_w32(eth, 1, MTK_REG_MTK_RST_GL);
- mtk_reg_w32(eth, 0, MTK_REG_MTK_RST_GL);
- }
-
- return 0;
-}
-
-static int __init mtk_init(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- struct device_node *port;
- const char *mac_addr;
- int err;
-
- mac_addr = of_get_mac_address(mac->of_node);
- if (mac_addr)
- ether_addr_copy(dev->dev_addr, mac_addr);
-
- /* If the mac address is invalid, use random mac address */
- if (!is_valid_ether_addr(dev->dev_addr)) {
- eth_hw_addr_random(dev);
- dev_err(eth->dev, "generated random MAC address %pM\n",
- dev->dev_addr);
- }
- mac->hw->soc->set_mac(mac, dev->dev_addr);
-
- if (eth->soc->port_init)
- for_each_child_of_node(mac->of_node, port)
- if (of_device_is_compatible(port,
- "mediatek,eth-port") &&
- of_device_is_available(port))
- eth->soc->port_init(eth, mac, port);
-
- if (eth->phy) {
- err = eth->phy->connect(mac);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static void mtk_uninit(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
-
- if (eth->phy)
- eth->phy->disconnect(mac);
- mtk_mdio_cleanup(eth);
-
- mtk_irq_disable(eth, ~0);
- free_irq(dev->irq, dev);
-}
-
-static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
- struct mtk_mac *mac = netdev_priv(dev);
-
- if (!mac->phy_dev)
- return -ENODEV;
-
- switch (cmd) {
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- case SIOCSMIIREG:
- return phy_mii_ioctl(mac->phy_dev, ifr, cmd);
- default:
- break;
- }
-
- return -EOPNOTSUPP;
-}
-
-static int mtk_change_mtu(struct net_device *dev, int new_mtu)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- int frag_size, old_mtu;
- u32 fwd_cfg;
-
- if (!eth->soc->jumbo_frame)
- return eth_change_mtu(dev, new_mtu);
-
- frag_size = mtk_max_frag_size(new_mtu);
- if (new_mtu < 68 || frag_size > PAGE_SIZE)
- return -EINVAL;
-
- old_mtu = dev->mtu;
- dev->mtu = new_mtu;
-
- /* return early if the buffer sizes will not change */
- if (old_mtu <= ETH_DATA_LEN && new_mtu <= ETH_DATA_LEN)
- return 0;
- if (old_mtu > ETH_DATA_LEN && new_mtu > ETH_DATA_LEN)
- return 0;
-
- if (new_mtu <= ETH_DATA_LEN)
- eth->rx_ring[0].frag_size = mtk_max_frag_size(ETH_DATA_LEN);
- else
- eth->rx_ring[0].frag_size = PAGE_SIZE;
- eth->rx_ring[0].rx_buf_size =
- mtk_max_buf_size(eth->rx_ring[0].frag_size);
-
- if (!netif_running(dev))
- return 0;
-
- mtk_stop(dev);
- fwd_cfg = mtk_r32(eth, MTK_GDMA1_FWD_CFG);
- if (new_mtu <= ETH_DATA_LEN) {
- fwd_cfg &= ~MTK_GDM1_JMB_EN;
- } else {
- fwd_cfg &= ~(MTK_GDM1_JMB_LEN_MASK << MTK_GDM1_JMB_LEN_SHIFT);
- fwd_cfg |= (DIV_ROUND_UP(frag_size, 1024) <<
- MTK_GDM1_JMB_LEN_SHIFT) | MTK_GDM1_JMB_EN;
- }
- mtk_w32(eth, fwd_cfg, MTK_GDMA1_FWD_CFG);
-
- return mtk_open(dev);
-}
-
-static void mtk_pending_work(struct work_struct *work)
-{
- struct mtk_mac *mac = container_of(work, struct mtk_mac, pending_work);
- struct mtk_eth *eth = mac->hw;
- struct net_device *dev = eth->netdev[mac->id];
- int err;
-
- rtnl_lock();
- mtk_stop(dev);
-
- err = mtk_open(dev);
- if (err) {
- netif_alert(eth, ifup, dev,
- "Driver up/down cycle failed, closing device.\n");
- dev_close(dev);
- }
- rtnl_unlock();
-}
-
-static int mtk_cleanup(struct mtk_eth *eth)
-{
- int i;
-
- for (i = 0; i < eth->soc->mac_count; i++) {
- struct mtk_mac *mac = netdev_priv(eth->netdev[i]);
-
- if (!eth->netdev[i])
- continue;
-
- unregister_netdev(eth->netdev[i]);
- free_netdev(eth->netdev[i]);
- cancel_work_sync(&mac->pending_work);
- }
-
- return 0;
-}
-
-static const struct net_device_ops mtk_netdev_ops = {
- .ndo_init = mtk_init,
- .ndo_uninit = mtk_uninit,
- .ndo_open = mtk_open,
- .ndo_stop = mtk_stop,
- .ndo_start_xmit = mtk_start_xmit,
- .ndo_set_mac_address = mtk_set_mac_address,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_do_ioctl = mtk_do_ioctl,
- .ndo_change_mtu = mtk_change_mtu,
- .ndo_tx_timeout = mtk_tx_timeout,
- .ndo_get_stats64 = mtk_get_stats64,
- .ndo_vlan_rx_add_vid = mtk_vlan_rx_add_vid,
- .ndo_vlan_rx_kill_vid = mtk_vlan_rx_kill_vid,
-#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_poll_controller = mtk_poll_controller,
-#endif
-};
-
-static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
-{
- struct mtk_mac *mac;
- const __be32 *_id = of_get_property(np, "reg", NULL);
- int id, err;
-
- if (!_id) {
- dev_err(eth->dev, "missing mac id\n");
- return -EINVAL;
- }
- id = be32_to_cpup(_id);
- if (id >= eth->soc->mac_count || eth->netdev[id]) {
- dev_err(eth->dev, "%d is not a valid mac id\n", id);
- return -EINVAL;
- }
-
- eth->netdev[id] = alloc_etherdev(sizeof(*mac));
- if (!eth->netdev[id]) {
- dev_err(eth->dev, "alloc_etherdev failed\n");
- return -ENOMEM;
- }
- mac = netdev_priv(eth->netdev[id]);
- eth->mac[id] = mac;
- mac->id = id;
- mac->hw = eth;
- mac->of_node = np;
- INIT_WORK(&mac->pending_work, mtk_pending_work);
-
- if (mtk_reg_table[MTK_REG_MTK_COUNTER_BASE]) {
- mac->hw_stats = devm_kzalloc(eth->dev,
- sizeof(*mac->hw_stats),
- GFP_KERNEL);
- if (!mac->hw_stats) {
- err = -ENOMEM;
- goto free_netdev;
- }
- spin_lock_init(&mac->hw_stats->stats_lock);
- mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
- }
-
- SET_NETDEV_DEV(eth->netdev[id], eth->dev);
- eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
- eth->netdev[id]->base_addr = (unsigned long)eth->base;
-
- if (eth->soc->init_data)
- eth->soc->init_data(eth->soc, eth->netdev[id]);
-
- eth->netdev[id]->vlan_features = eth->soc->hw_features &
- ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
- eth->netdev[id]->features |= eth->soc->hw_features;
-
- if (mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE])
- eth->netdev[id]->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-
- mtk_set_ethtool_ops(eth->netdev[id]);
-
- err = register_netdev(eth->netdev[id]);
- if (err) {
- dev_err(eth->dev, "error bringing up device\n");
- err = -ENOMEM;
- goto free_netdev;
- }
- eth->netdev[id]->irq = eth->irq;
- netif_info(eth, probe, eth->netdev[id],
- "mediatek frame engine at 0x%08lx, irq %d\n",
- eth->netdev[id]->base_addr, eth->netdev[id]->irq);
-
- return 0;
-
-free_netdev:
- free_netdev(eth->netdev[id]);
- return err;
-}
-
-static int mtk_probe(struct platform_device *pdev)
-{
- struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- const struct of_device_id *match;
- struct device_node *mac_np;
- struct mtk_soc_data *soc;
- struct mtk_eth *eth;
- struct clk *sysclk;
- int err;
-
- device_reset(&pdev->dev);
-
- match = of_match_device(of_mtk_match, &pdev->dev);
- soc = (struct mtk_soc_data *)match->data;
-
- if (soc->reg_table)
- mtk_reg_table = soc->reg_table;
-
- eth = devm_kzalloc(&pdev->dev, sizeof(*eth), GFP_KERNEL);
- if (!eth)
- return -ENOMEM;
-
- eth->base = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(eth->base))
- return PTR_ERR(eth->base);
-
- spin_lock_init(&eth->page_lock);
-
- eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
- "mediatek,ethsys");
- if (IS_ERR(eth->ethsys))
- return PTR_ERR(eth->ethsys);
-
- eth->irq = platform_get_irq(pdev, 0);
- if (eth->irq < 0) {
- dev_err(&pdev->dev, "no IRQ resource found\n");
- return -ENXIO;
- }
-
- sysclk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(sysclk)) {
- dev_err(&pdev->dev,
- "the clock is not defined in the devicetree\n");
- return -ENXIO;
- }
- eth->sysclk = clk_get_rate(sysclk);
-
- eth->switch_np = of_parse_phandle(pdev->dev.of_node,
- "mediatek,switch", 0);
- if (soc->has_switch && !eth->switch_np) {
- dev_err(&pdev->dev, "failed to read switch phandle\n");
- return -ENODEV;
- }
-
- eth->dev = &pdev->dev;
- eth->soc = soc;
- eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
-
- err = mtk_init_hw(eth);
- if (err)
- return err;
-
- if (eth->soc->mac_count > 1) {
- for_each_child_of_node(pdev->dev.of_node, mac_np) {
- if (!of_device_is_compatible(mac_np,
- "mediatek,eth-mac"))
- continue;
-
- if (!of_device_is_available(mac_np))
- continue;
-
- err = mtk_add_mac(eth, mac_np);
- if (err)
- goto err_free_dev;
- }
-
- init_dummy_netdev(&eth->dummy_dev);
- netif_napi_add(&eth->dummy_dev, &eth->rx_napi, mtk_poll,
- soc->napi_weight);
- } else {
- err = mtk_add_mac(eth, pdev->dev.of_node);
- if (err)
- goto err_free_dev;
- netif_napi_add(eth->netdev[0], &eth->rx_napi, mtk_poll,
- soc->napi_weight);
- }
-
- platform_set_drvdata(pdev, eth);
-
- return 0;
-
-err_free_dev:
- mtk_cleanup(eth);
- return err;
-}
-
-static int mtk_remove(struct platform_device *pdev)
-{
- struct mtk_eth *eth = platform_get_drvdata(pdev);
-
- netif_napi_del(&eth->rx_napi);
- mtk_cleanup(eth);
- platform_set_drvdata(pdev, NULL);
-
- return 0;
-}
-
-static struct platform_driver mtk_driver = {
- .probe = mtk_probe,
- .remove = mtk_remove,
- .driver = {
- .name = "mtk_soc_eth",
- .of_match_table = of_mtk_match,
- },
-};
-
-module_platform_driver(mtk_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
-MODULE_DESCRIPTION("Ethernet driver for MediaTek SoC");
diff --git a/drivers/staging/mt7621-eth/mtk_eth_soc.h b/drivers/staging/mt7621-eth/mtk_eth_soc.h
deleted file mode 100644
index e6ed80433f49..000000000000
--- a/drivers/staging/mt7621-eth/mtk_eth_soc.h
+++ /dev/null
@@ -1,716 +0,0 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#ifndef MTK_ETH_H
-#define MTK_ETH_H
-
-#include <linux/mii.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
-#include <linux/dma-mapping.h>
-#include <linux/phy.h>
-#include <linux/ethtool.h>
-#include <linux/version.h>
-#include <linux/atomic.h>
-
-/* these registers have different offsets depending on the SoC. we use a lookup
- * table for these
- */
-enum mtk_reg {
- MTK_REG_PDMA_GLO_CFG = 0,
- MTK_REG_PDMA_RST_CFG,
- MTK_REG_DLY_INT_CFG,
- MTK_REG_TX_BASE_PTR0,
- MTK_REG_TX_MAX_CNT0,
- MTK_REG_TX_CTX_IDX0,
- MTK_REG_TX_DTX_IDX0,
- MTK_REG_RX_BASE_PTR0,
- MTK_REG_RX_MAX_CNT0,
- MTK_REG_RX_CALC_IDX0,
- MTK_REG_RX_DRX_IDX0,
- MTK_REG_MTK_INT_ENABLE,
- MTK_REG_MTK_INT_STATUS,
- MTK_REG_MTK_DMA_VID_BASE,
- MTK_REG_MTK_COUNTER_BASE,
- MTK_REG_MTK_RST_GL,
- MTK_REG_MTK_INT_STATUS2,
- MTK_REG_COUNT
-};
-
-/* delayed interrupt bits */
-#define MTK_DELAY_EN_INT 0x80
-#define MTK_DELAY_MAX_INT 0x04
-#define MTK_DELAY_MAX_TOUT 0x04
-#define MTK_DELAY_TIME 20
-#define MTK_DELAY_CHAN (((MTK_DELAY_EN_INT | MTK_DELAY_MAX_INT) << 8) \
- | MTK_DELAY_MAX_TOUT)
-#define MTK_DELAY_INIT ((MTK_DELAY_CHAN << 16) | MTK_DELAY_CHAN)
-#define MTK_PSE_FQFC_CFG_INIT 0x80504000
-#define MTK_PSE_FQFC_CFG_256Q 0xff908000
-
-/* interrupt bits */
-#define MTK_CNT_PPE_AF BIT(31)
-#define MTK_CNT_GDM_AF BIT(29)
-#define MTK_PSE_P2_FC BIT(26)
-#define MTK_PSE_BUF_DROP BIT(24)
-#define MTK_GDM_OTHER_DROP BIT(23)
-#define MTK_PSE_P1_FC BIT(22)
-#define MTK_PSE_P0_FC BIT(21)
-#define MTK_PSE_FQ_EMPTY BIT(20)
-#define MTK_GE1_STA_CHG BIT(18)
-#define MTK_TX_COHERENT BIT(17)
-#define MTK_RX_COHERENT BIT(16)
-#define MTK_TX_DONE_INT3 BIT(11)
-#define MTK_TX_DONE_INT2 BIT(10)
-#define MTK_TX_DONE_INT1 BIT(9)
-#define MTK_TX_DONE_INT0 BIT(8)
-#define MTK_RX_DONE_INT0 BIT(2)
-#define MTK_TX_DLY_INT BIT(1)
-#define MTK_RX_DLY_INT BIT(0)
-
-#define MTK_RX_DONE_INT MTK_RX_DONE_INT0
-#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
- MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
-
-#define RT5350_RX_DLY_INT BIT(30)
-#define RT5350_TX_DLY_INT BIT(28)
-#define RT5350_RX_DONE_INT1 BIT(17)
-#define RT5350_RX_DONE_INT0 BIT(16)
-#define RT5350_TX_DONE_INT3 BIT(3)
-#define RT5350_TX_DONE_INT2 BIT(2)
-#define RT5350_TX_DONE_INT1 BIT(1)
-#define RT5350_TX_DONE_INT0 BIT(0)
-
-#define RT5350_RX_DONE_INT (RT5350_RX_DONE_INT0 | RT5350_RX_DONE_INT1)
-#define RT5350_TX_DONE_INT (RT5350_TX_DONE_INT0 | RT5350_TX_DONE_INT1 | \
- RT5350_TX_DONE_INT2 | RT5350_TX_DONE_INT3)
-
-/* registers */
-#define MTK_GDMA_OFFSET 0x0020
-#define MTK_PSE_OFFSET 0x0040
-#define MTK_GDMA2_OFFSET 0x0060
-#define MTK_CDMA_OFFSET 0x0080
-#define MTK_DMA_VID0 0x00a8
-#define MTK_PDMA_OFFSET 0x0100
-#define MTK_PPE_OFFSET 0x0200
-#define MTK_CMTABLE_OFFSET 0x0400
-#define MTK_POLICYTABLE_OFFSET 0x1000
-
-#define MT7621_GDMA_OFFSET 0x0500
-#define MT7620_GDMA_OFFSET 0x0600
-
-#define RT5350_PDMA_OFFSET 0x0800
-#define RT5350_SDM_OFFSET 0x0c00
-
-#define MTK_MDIO_ACCESS 0x00
-#define MTK_MDIO_CFG 0x04
-#define MTK_GLO_CFG 0x08
-#define MTK_RST_GL 0x0C
-#define MTK_INT_STATUS 0x10
-#define MTK_INT_ENABLE 0x14
-#define MTK_MDIO_CFG2 0x18
-#define MTK_FOC_TS_T 0x1C
-
-#define MTK_GDMA1_FWD_CFG (MTK_GDMA_OFFSET + 0x00)
-#define MTK_GDMA1_SCH_CFG (MTK_GDMA_OFFSET + 0x04)
-#define MTK_GDMA1_SHPR_CFG (MTK_GDMA_OFFSET + 0x08)
-#define MTK_GDMA1_MAC_ADRL (MTK_GDMA_OFFSET + 0x0C)
-#define MTK_GDMA1_MAC_ADRH (MTK_GDMA_OFFSET + 0x10)
-
-#define MTK_GDMA2_FWD_CFG (MTK_GDMA2_OFFSET + 0x00)
-#define MTK_GDMA2_SCH_CFG (MTK_GDMA2_OFFSET + 0x04)
-#define MTK_GDMA2_SHPR_CFG (MTK_GDMA2_OFFSET + 0x08)
-#define MTK_GDMA2_MAC_ADRL (MTK_GDMA2_OFFSET + 0x0C)
-#define MTK_GDMA2_MAC_ADRH (MTK_GDMA2_OFFSET + 0x10)
-
-#define MTK_PSE_FQ_CFG (MTK_PSE_OFFSET + 0x00)
-#define MTK_CDMA_FC_CFG (MTK_PSE_OFFSET + 0x04)
-#define MTK_GDMA1_FC_CFG (MTK_PSE_OFFSET + 0x08)
-#define MTK_GDMA2_FC_CFG (MTK_PSE_OFFSET + 0x0C)
-
-#define MTK_CDMA_CSG_CFG (MTK_CDMA_OFFSET + 0x00)
-#define MTK_CDMA_SCH_CFG (MTK_CDMA_OFFSET + 0x04)
-
-#define MT7621_GDMA_FWD_CFG(x) (MT7621_GDMA_OFFSET + (x * 0x1000))
-
-/* FIXME this might be different for different SOCs */
-#define MT7620_GDMA1_FWD_CFG (MT7621_GDMA_OFFSET + 0x00)
-
-#define RT5350_TX_BASE_PTR0 (RT5350_PDMA_OFFSET + 0x00)
-#define RT5350_TX_MAX_CNT0 (RT5350_PDMA_OFFSET + 0x04)
-#define RT5350_TX_CTX_IDX0 (RT5350_PDMA_OFFSET + 0x08)
-#define RT5350_TX_DTX_IDX0 (RT5350_PDMA_OFFSET + 0x0C)
-#define RT5350_TX_BASE_PTR1 (RT5350_PDMA_OFFSET + 0x10)
-#define RT5350_TX_MAX_CNT1 (RT5350_PDMA_OFFSET + 0x14)
-#define RT5350_TX_CTX_IDX1 (RT5350_PDMA_OFFSET + 0x18)
-#define RT5350_TX_DTX_IDX1 (RT5350_PDMA_OFFSET + 0x1C)
-#define RT5350_TX_BASE_PTR2 (RT5350_PDMA_OFFSET + 0x20)
-#define RT5350_TX_MAX_CNT2 (RT5350_PDMA_OFFSET + 0x24)
-#define RT5350_TX_CTX_IDX2 (RT5350_PDMA_OFFSET + 0x28)
-#define RT5350_TX_DTX_IDX2 (RT5350_PDMA_OFFSET + 0x2C)
-#define RT5350_TX_BASE_PTR3 (RT5350_PDMA_OFFSET + 0x30)
-#define RT5350_TX_MAX_CNT3 (RT5350_PDMA_OFFSET + 0x34)
-#define RT5350_TX_CTX_IDX3 (RT5350_PDMA_OFFSET + 0x38)
-#define RT5350_TX_DTX_IDX3 (RT5350_PDMA_OFFSET + 0x3C)
-#define RT5350_RX_BASE_PTR0 (RT5350_PDMA_OFFSET + 0x100)
-#define RT5350_RX_MAX_CNT0 (RT5350_PDMA_OFFSET + 0x104)
-#define RT5350_RX_CALC_IDX0 (RT5350_PDMA_OFFSET + 0x108)
-#define RT5350_RX_DRX_IDX0 (RT5350_PDMA_OFFSET + 0x10C)
-#define RT5350_RX_BASE_PTR1 (RT5350_PDMA_OFFSET + 0x110)
-#define RT5350_RX_MAX_CNT1 (RT5350_PDMA_OFFSET + 0x114)
-#define RT5350_RX_CALC_IDX1 (RT5350_PDMA_OFFSET + 0x118)
-#define RT5350_RX_DRX_IDX1 (RT5350_PDMA_OFFSET + 0x11C)
-#define RT5350_PDMA_GLO_CFG (RT5350_PDMA_OFFSET + 0x204)
-#define RT5350_PDMA_RST_CFG (RT5350_PDMA_OFFSET + 0x208)
-#define RT5350_DLY_INT_CFG (RT5350_PDMA_OFFSET + 0x20c)
-#define RT5350_MTK_INT_STATUS (RT5350_PDMA_OFFSET + 0x220)
-#define RT5350_MTK_INT_ENABLE (RT5350_PDMA_OFFSET + 0x228)
-#define RT5350_PDMA_SCH_CFG (RT5350_PDMA_OFFSET + 0x280)
-
-#define MTK_PDMA_GLO_CFG (MTK_PDMA_OFFSET + 0x00)
-#define MTK_PDMA_RST_CFG (MTK_PDMA_OFFSET + 0x04)
-#define MTK_PDMA_SCH_CFG (MTK_PDMA_OFFSET + 0x08)
-#define MTK_DLY_INT_CFG (MTK_PDMA_OFFSET + 0x0C)
-#define MTK_TX_BASE_PTR0 (MTK_PDMA_OFFSET + 0x10)
-#define MTK_TX_MAX_CNT0 (MTK_PDMA_OFFSET + 0x14)
-#define MTK_TX_CTX_IDX0 (MTK_PDMA_OFFSET + 0x18)
-#define MTK_TX_DTX_IDX0 (MTK_PDMA_OFFSET + 0x1C)
-#define MTK_TX_BASE_PTR1 (MTK_PDMA_OFFSET + 0x20)
-#define MTK_TX_MAX_CNT1 (MTK_PDMA_OFFSET + 0x24)
-#define MTK_TX_CTX_IDX1 (MTK_PDMA_OFFSET + 0x28)
-#define MTK_TX_DTX_IDX1 (MTK_PDMA_OFFSET + 0x2C)
-#define MTK_RX_BASE_PTR0 (MTK_PDMA_OFFSET + 0x30)
-#define MTK_RX_MAX_CNT0 (MTK_PDMA_OFFSET + 0x34)
-#define MTK_RX_CALC_IDX0 (MTK_PDMA_OFFSET + 0x38)
-#define MTK_RX_DRX_IDX0 (MTK_PDMA_OFFSET + 0x3C)
-#define MTK_TX_BASE_PTR2 (MTK_PDMA_OFFSET + 0x40)
-#define MTK_TX_MAX_CNT2 (MTK_PDMA_OFFSET + 0x44)
-#define MTK_TX_CTX_IDX2 (MTK_PDMA_OFFSET + 0x48)
-#define MTK_TX_DTX_IDX2 (MTK_PDMA_OFFSET + 0x4C)
-#define MTK_TX_BASE_PTR3 (MTK_PDMA_OFFSET + 0x50)
-#define MTK_TX_MAX_CNT3 (MTK_PDMA_OFFSET + 0x54)
-#define MTK_TX_CTX_IDX3 (MTK_PDMA_OFFSET + 0x58)
-#define MTK_TX_DTX_IDX3 (MTK_PDMA_OFFSET + 0x5C)
-#define MTK_RX_BASE_PTR1 (MTK_PDMA_OFFSET + 0x60)
-#define MTK_RX_MAX_CNT1 (MTK_PDMA_OFFSET + 0x64)
-#define MTK_RX_CALC_IDX1 (MTK_PDMA_OFFSET + 0x68)
-#define MTK_RX_DRX_IDX1 (MTK_PDMA_OFFSET + 0x6C)
-
-/* Switch DMA configuration */
-#define RT5350_SDM_CFG (RT5350_SDM_OFFSET + 0x00)
-#define RT5350_SDM_RRING (RT5350_SDM_OFFSET + 0x04)
-#define RT5350_SDM_TRING (RT5350_SDM_OFFSET + 0x08)
-#define RT5350_SDM_MAC_ADRL (RT5350_SDM_OFFSET + 0x0C)
-#define RT5350_SDM_MAC_ADRH (RT5350_SDM_OFFSET + 0x10)
-#define RT5350_SDM_TPCNT (RT5350_SDM_OFFSET + 0x100)
-#define RT5350_SDM_TBCNT (RT5350_SDM_OFFSET + 0x104)
-#define RT5350_SDM_RPCNT (RT5350_SDM_OFFSET + 0x108)
-#define RT5350_SDM_RBCNT (RT5350_SDM_OFFSET + 0x10C)
-#define RT5350_SDM_CS_ERR (RT5350_SDM_OFFSET + 0x110)
-
-#define RT5350_SDM_ICS_EN BIT(16)
-#define RT5350_SDM_TCS_EN BIT(17)
-#define RT5350_SDM_UCS_EN BIT(18)
-
-/* QDMA registers */
-#define MTK_QTX_CFG(x) (0x1800 + (x * 0x10))
-#define MTK_QTX_SCH(x) (0x1804 + (x * 0x10))
-#define MTK_QRX_BASE_PTR0 0x1900
-#define MTK_QRX_MAX_CNT0 0x1904
-#define MTK_QRX_CRX_IDX0 0x1908
-#define MTK_QRX_DRX_IDX0 0x190C
-#define MTK_QDMA_GLO_CFG 0x1A04
-#define MTK_QDMA_RST_IDX 0x1A08
-#define MTK_QDMA_DELAY_INT 0x1A0C
-#define MTK_QDMA_FC_THRES 0x1A10
-#define MTK_QMTK_INT_STATUS 0x1A18
-#define MTK_QMTK_INT_ENABLE 0x1A1C
-#define MTK_QDMA_HRED2 0x1A44
-
-#define MTK_QTX_CTX_PTR 0x1B00
-#define MTK_QTX_DTX_PTR 0x1B04
-
-#define MTK_QTX_CRX_PTR 0x1B10
-#define MTK_QTX_DRX_PTR 0x1B14
-
-#define MTK_QDMA_FQ_HEAD 0x1B20
-#define MTK_QDMA_FQ_TAIL 0x1B24
-#define MTK_QDMA_FQ_CNT 0x1B28
-#define MTK_QDMA_FQ_BLEN 0x1B2C
-
-#define QDMA_PAGE_SIZE 2048
-#define QDMA_TX_OWNER_CPU BIT(31)
-#define QDMA_TX_SWC BIT(14)
-#define TX_QDMA_SDL(_x) (((_x) & 0x3fff) << 16)
-#define QDMA_RES_THRES 4
-
-/* MDIO_CFG register bits */
-#define MTK_MDIO_CFG_AUTO_POLL_EN BIT(29)
-#define MTK_MDIO_CFG_GP1_BP_EN BIT(16)
-#define MTK_MDIO_CFG_GP1_FRC_EN BIT(15)
-#define MTK_MDIO_CFG_GP1_SPEED_10 (0 << 13)
-#define MTK_MDIO_CFG_GP1_SPEED_100 (1 << 13)
-#define MTK_MDIO_CFG_GP1_SPEED_1000 (2 << 13)
-#define MTK_MDIO_CFG_GP1_DUPLEX BIT(12)
-#define MTK_MDIO_CFG_GP1_FC_TX BIT(11)
-#define MTK_MDIO_CFG_GP1_FC_RX BIT(10)
-#define MTK_MDIO_CFG_GP1_LNK_DWN BIT(9)
-#define MTK_MDIO_CFG_GP1_AN_FAIL BIT(8)
-#define MTK_MDIO_CFG_MDC_CLK_DIV_1 (0 << 6)
-#define MTK_MDIO_CFG_MDC_CLK_DIV_2 (1 << 6)
-#define MTK_MDIO_CFG_MDC_CLK_DIV_4 (2 << 6)
-#define MTK_MDIO_CFG_MDC_CLK_DIV_8 (3 << 6)
-#define MTK_MDIO_CFG_TURBO_MII_FREQ BIT(5)
-#define MTK_MDIO_CFG_TURBO_MII_MODE BIT(4)
-#define MTK_MDIO_CFG_RX_CLK_SKEW_0 (0 << 2)
-#define MTK_MDIO_CFG_RX_CLK_SKEW_200 (1 << 2)
-#define MTK_MDIO_CFG_RX_CLK_SKEW_400 (2 << 2)
-#define MTK_MDIO_CFG_RX_CLK_SKEW_INV (3 << 2)
-#define MTK_MDIO_CFG_TX_CLK_SKEW_0 0
-#define MTK_MDIO_CFG_TX_CLK_SKEW_200 1
-#define MTK_MDIO_CFG_TX_CLK_SKEW_400 2
-#define MTK_MDIO_CFG_TX_CLK_SKEW_INV 3
-
-/* uni-cast port */
-#define MTK_GDM1_JMB_LEN_MASK 0xf
-#define MTK_GDM1_JMB_LEN_SHIFT 28
-#define MTK_GDM1_ICS_EN BIT(22)
-#define MTK_GDM1_TCS_EN BIT(21)
-#define MTK_GDM1_UCS_EN BIT(20)
-#define MTK_GDM1_JMB_EN BIT(19)
-#define MTK_GDM1_STRPCRC BIT(16)
-#define MTK_GDM1_UFRC_P_CPU (0 << 12)
-#define MTK_GDM1_UFRC_P_GDMA1 (1 << 12)
-#define MTK_GDM1_UFRC_P_PPE (6 << 12)
-
-/* checksums */
-#define MTK_ICS_GEN_EN BIT(2)
-#define MTK_UCS_GEN_EN BIT(1)
-#define MTK_TCS_GEN_EN BIT(0)
-
-/* dma mode */
-#define MTK_PDMA BIT(0)
-#define MTK_QDMA BIT(1)
-#define MTK_PDMA_RX_QDMA_TX (MTK_PDMA | MTK_QDMA)
-
-/* dma ring */
-#define MTK_PST_DRX_IDX0 BIT(16)
-#define MTK_PST_DTX_IDX3 BIT(3)
-#define MTK_PST_DTX_IDX2 BIT(2)
-#define MTK_PST_DTX_IDX1 BIT(1)
-#define MTK_PST_DTX_IDX0 BIT(0)
-
-#define MTK_RX_2B_OFFSET BIT(31)
-#define MTK_TX_WB_DDONE BIT(6)
-#define MTK_RX_DMA_BUSY BIT(3)
-#define MTK_TX_DMA_BUSY BIT(1)
-#define MTK_RX_DMA_EN BIT(2)
-#define MTK_TX_DMA_EN BIT(0)
-
-#define MTK_PDMA_SIZE_4DWORDS (0 << 4)
-#define MTK_PDMA_SIZE_8DWORDS (1 << 4)
-#define MTK_PDMA_SIZE_16DWORDS (2 << 4)
-
-#define MTK_US_CYC_CNT_MASK 0xff
-#define MTK_US_CYC_CNT_SHIFT 0x8
-#define MTK_US_CYC_CNT_DIVISOR 1000000
-
-/* PDMA descriptor rxd2 */
-#define RX_DMA_DONE BIT(31)
-#define RX_DMA_LSO BIT(30)
-#define RX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16)
-#define RX_DMA_GET_PLEN0(_x) (((_x) >> 16) & 0x3fff)
-#define RX_DMA_TAG BIT(15)
-
-/* PDMA descriptor rxd3 */
-#define RX_DMA_TPID(_x) (((_x) >> 16) & 0xffff)
-#define RX_DMA_VID(_x) ((_x) & 0xfff)
-
-/* PDMA descriptor rxd4 */
-#define RX_DMA_L4VALID BIT(30)
-#define RX_DMA_FPORT_SHIFT 19
-#define RX_DMA_FPORT_MASK 0x7
-
-struct mtk_rx_dma {
- unsigned int rxd1;
- unsigned int rxd2;
- unsigned int rxd3;
- unsigned int rxd4;
-} __packed __aligned(4);
-
-/* PDMA tx descriptor bits */
-#define TX_DMA_BUF_LEN 0x3fff
-#define TX_DMA_PLEN0_MASK (TX_DMA_BUF_LEN << 16)
-#define TX_DMA_PLEN0(_x) (((_x) & TX_DMA_BUF_LEN) << 16)
-#define TX_DMA_PLEN1(_x) ((_x) & TX_DMA_BUF_LEN)
-#define TX_DMA_GET_PLEN0(_x) (((_x) >> 16) & TX_DMA_BUF_LEN)
-#define TX_DMA_GET_PLEN1(_x) ((_x) & TX_DMA_BUF_LEN)
-#define TX_DMA_LS1 BIT(14)
-#define TX_DMA_LS0 BIT(30)
-#define TX_DMA_DONE BIT(31)
-#define TX_DMA_FPORT_SHIFT 25
-#define TX_DMA_FPORT_MASK 0x7
-#define TX_DMA_INS_VLAN_MT7621 BIT(16)
-#define TX_DMA_INS_VLAN BIT(7)
-#define TX_DMA_INS_PPPOE BIT(12)
-#define TX_DMA_TAG BIT(15)
-#define TX_DMA_TAG_MASK BIT(15)
-#define TX_DMA_QN(_x) ((_x) << 16)
-#define TX_DMA_PN(_x) ((_x) << 24)
-#define TX_DMA_QN_MASK TX_DMA_QN(0x7)
-#define TX_DMA_PN_MASK TX_DMA_PN(0x7)
-#define TX_DMA_UDF BIT(20)
-#define TX_DMA_CHKSUM (0x7 << 29)
-#define TX_DMA_TSO BIT(28)
-#define TX_DMA_DESP4_DEF (TX_DMA_QN(3) | TX_DMA_PN(1))
-
-/* frame engine counters */
-#define MTK_PPE_AC_BCNT0 (MTK_CMTABLE_OFFSET + 0x00)
-#define MTK_GDMA1_TX_GBCNT (MTK_CMTABLE_OFFSET + 0x300)
-#define MTK_GDMA2_TX_GBCNT (MTK_GDMA1_TX_GBCNT + 0x40)
-
-/* phy device flags */
-#define MTK_PHY_FLAG_PORT BIT(0)
-#define MTK_PHY_FLAG_ATTACH BIT(1)
-
-struct mtk_tx_dma {
- unsigned int txd1;
- unsigned int txd2;
- unsigned int txd3;
- unsigned int txd4;
-} __packed __aligned(4);
-
-struct mtk_eth;
-struct mtk_mac;
-
-/* manage the attached phys */
-struct mtk_phy {
- spinlock_t lock;
-
- struct phy_device *phy[8];
- struct device_node *phy_node[8];
- const __be32 *phy_fixed[8];
- int duplex[8];
- int speed[8];
- int tx_fc[8];
- int rx_fc[8];
- int (*connect)(struct mtk_mac *mac);
- void (*disconnect)(struct mtk_mac *mac);
- void (*start)(struct mtk_mac *mac);
- void (*stop)(struct mtk_mac *mac);
-};
-
-/* struct mtk_soc_data - the structure that holds the SoC specific data
- * @reg_table: Some of the legacy registers changed their location
- * over time. Their offsets are stored in this table
- *
- * @init_data: Some features depend on the silicon revision. This
- * callback allows runtime modification of the content of
- * this struct
- * @reset_fe: This callback is used to trigger the reset of the frame
- * engine
- * @set_mac: This callback is used to set the unicast mac address
- * filter
- * @fwd_config: This callback is used to setup the forward config
- * register of the MAC
- * @switch_init: This callback is used to bring up the switch core
- * @port_init: Some SoCs have ports that can be router to a switch port
- * or an external PHY. This callback is used to setup these
- * ports.
- * @has_carrier: This callback allows driver to check if there is a cable
- * attached.
- * @mdio_init: This callbck is used to setup the MDIO bus if one is
- * present
- * @mdio_cleanup: This callback is used to cleanup the MDIO state.
- * @mdio_write: This callback is used to write data to the MDIO bus.
- * @mdio_read: This callback is used to write data to the MDIO bus.
- * @mdio_adjust_link: This callback is used to apply the PHY settings.
- * @piac_offset: the PIAC register has a different different base offset
- * @hw_features: feature set depends on the SoC type
- * @dma_ring_size: allow GBit SoCs to set bigger rings than FE SoCs
- * @napi_weight: allow GBit SoCs to set bigger napi weight than FE SoCs
- * @dma_type: SoCs is PDMA, QDMA or a mix of the 2
- * @pdma_glo_cfg: the default DMA configuration
- * @rx_int: the TX interrupt bits used by the SoC
- * @tx_int: the TX interrupt bits used by the SoC
- * @status_int: the Status interrupt bits used by the SoC
- * @checksum_bit: the bits used to turn on HW checksumming
- * @txd4: default value of the TXD4 descriptor
- * @mac_count: the number of MACs that the SoC has
- * @new_stats: there is a old and new way to read hardware stats
- * registers
- * @jumbo_frame: does the SoC support jumbo frames ?
- * @rx_2b_offset: tell the rx dma to offset the data by 2 bytes
- * @rx_sg_dma: scatter gather support
- * @padding_64b enable 64 bit padding
- * @padding_bug: rt2880 has a padding bug
- * @has_switch: does the SoC have a built-in switch
- *
- * Although all of the supported SoCs share the same basic functionality, there
- * are several SoC specific functions and features that we need to support. This
- * struct holds the SoC specific data so that the common core can figure out
- * how to setup and use these differences.
- */
-struct mtk_soc_data {
- const u16 *reg_table;
-
- void (*init_data)(struct mtk_soc_data *data, struct net_device *netdev);
- void (*reset_fe)(struct mtk_eth *eth);
- void (*set_mac)(struct mtk_mac *mac, unsigned char *macaddr);
- int (*fwd_config)(struct mtk_eth *eth);
- int (*switch_init)(struct mtk_eth *eth);
- void (*port_init)(struct mtk_eth *eth, struct mtk_mac *mac,
- struct device_node *port);
- int (*has_carrier)(struct mtk_eth *eth);
- int (*mdio_init)(struct mtk_eth *eth);
- void (*mdio_cleanup)(struct mtk_eth *eth);
- int (*mdio_write)(struct mii_bus *bus, int phy_addr, int phy_reg,
- u16 val);
- int (*mdio_read)(struct mii_bus *bus, int phy_addr, int phy_reg);
- void (*mdio_adjust_link)(struct mtk_eth *eth, int port);
- u32 piac_offset;
- netdev_features_t hw_features;
- u32 dma_ring_size;
- u32 napi_weight;
- u32 dma_type;
- u32 pdma_glo_cfg;
- u32 rx_int;
- u32 tx_int;
- u32 status_int;
- u32 checksum_bit;
- u32 txd4;
- u32 mac_count;
-
- u32 new_stats:1;
- u32 jumbo_frame:1;
- u32 rx_2b_offset:1;
- u32 rx_sg_dma:1;
- u32 padding_64b:1;
- u32 padding_bug:1;
- u32 has_switch:1;
-};
-
-#define MTK_STAT_OFFSET 0x40
-
-/* struct mtk_hw_stats - the structure that holds the traffic statistics.
- * @stats_lock: make sure that stats operations are atomic
- * @reg_offset: the status register offset of the SoC
- * @syncp: the refcount
- *
- * All of the supported SoCs have hardware counters for traffic statstics.
- * Whenever the status IRQ triggers we can read the latest stats from these
- * counters and store them in this struct.
- */
-struct mtk_hw_stats {
- spinlock_t stats_lock;
- u32 reg_offset;
- struct u64_stats_sync syncp;
-
- u64 tx_bytes;
- u64 tx_packets;
- u64 tx_skip;
- u64 tx_collisions;
- u64 rx_bytes;
- u64 rx_packets;
- u64 rx_overflow;
- u64 rx_fcs_errors;
- u64 rx_short_errors;
- u64 rx_long_errors;
- u64 rx_checksum_errors;
- u64 rx_flow_control_packets;
-};
-
-/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how
- * memory was allocated so that it can be freed properly
- */
-enum mtk_tx_flags {
- MTK_TX_FLAGS_SINGLE0 = 0x01,
- MTK_TX_FLAGS_PAGE0 = 0x02,
- MTK_TX_FLAGS_PAGE1 = 0x04,
-};
-
-/* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at
- * by the TX descriptor s
- * @skb: The SKB pointer of the packet being sent
- * @dma_addr0: The base addr of the first segment
- * @dma_len0: The length of the first segment
- * @dma_addr1: The base addr of the second segment
- * @dma_len1: The length of the second segment
- */
-struct mtk_tx_buf {
- struct sk_buff *skb;
- u32 flags;
- DEFINE_DMA_UNMAP_ADDR(dma_addr0);
- DEFINE_DMA_UNMAP_LEN(dma_len0);
- DEFINE_DMA_UNMAP_ADDR(dma_addr1);
- DEFINE_DMA_UNMAP_LEN(dma_len1);
-};
-
-/* struct mtk_tx_ring - This struct holds info describing a TX ring
- * @tx_dma: The descriptor ring
- * @tx_buf: The memory pointed at by the ring
- * @tx_phys: The physical addr of tx_buf
- * @tx_next_free: Pointer to the next free descriptor
- * @tx_last_free: Pointer to the last free descriptor
- * @tx_thresh: The threshold of minimum amount of free descriptors
- * @tx_map: Callback to map a new packet into the ring
- * @tx_poll: Callback for the housekeeping function
- * @tx_clean: Callback for the cleanup function
- * @tx_ring_size: How many descriptors are in the ring
- * @tx_free_idx: The index of th next free descriptor
- * @tx_next_idx: QDMA uses a linked list. This element points to the next
- * free descriptor in the list
- * @tx_free_count: QDMA uses a linked list. Track how many free descriptors
- * are present
- */
-struct mtk_tx_ring {
- struct mtk_tx_dma *tx_dma;
- struct mtk_tx_buf *tx_buf;
- dma_addr_t tx_phys;
- struct mtk_tx_dma *tx_next_free;
- struct mtk_tx_dma *tx_last_free;
- u16 tx_thresh;
- int (*tx_map)(struct sk_buff *skb, struct net_device *dev, int tx_num,
- struct mtk_tx_ring *ring, bool gso);
- int (*tx_poll)(struct mtk_eth *eth, int budget, bool *tx_again);
- void (*tx_clean)(struct mtk_eth *eth);
-
- /* PDMA only */
- u16 tx_ring_size;
- u16 tx_free_idx;
-
- /* QDMA only */
- u16 tx_next_idx;
- atomic_t tx_free_count;
-};
-
-/* struct mtk_rx_ring - This struct holds info describing a RX ring
- * @rx_dma: The descriptor ring
- * @rx_data: The memory pointed at by the ring
- * @trx_phys: The physical addr of rx_buf
- * @rx_ring_size: How many descriptors are in the ring
- * @rx_buf_size: The size of each packet buffer
- * @rx_calc_idx: The current head of ring
- */
-struct mtk_rx_ring {
- struct mtk_rx_dma *rx_dma;
- u8 **rx_data;
- dma_addr_t rx_phys;
- u16 rx_ring_size;
- u16 frag_size;
- u16 rx_buf_size;
- u16 rx_calc_idx;
-};
-
-/* currently no SoC has more than 2 macs */
-#define MTK_MAX_DEVS 2
-
-/* struct mtk_eth - This is the main datasructure for holding the state
- * of the driver
- * @dev: The device pointer
- * @base: The mapped register i/o base
- * @page_lock: Make sure that register operations are atomic
- * @soc: pointer to our SoC specific data
- * @dummy_dev: we run 2 netdevs on 1 physical DMA ring and need a
- * dummy for NAPI to work
- * @netdev: The netdev instances
- * @mac: Each netdev is linked to a physical MAC
- * @switch_np: The phandle for the switch
- * @irq: The IRQ that we are using
- * @msg_enable: Ethtool msg level
- * @ysclk: The sysclk rate - neeed for calibration
- * @ethsys: The register map pointing at the range used to setup
- * MII modes
- * @dma_refcnt: track how many netdevs are using the DMA engine
- * @tx_ring: Pointer to the memore holding info about the TX ring
- * @rx_ring: Pointer to the memore holding info about the RX ring
- * @rx_napi: The NAPI struct
- * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring
- * @scratch_head: The scratch memory that scratch_ring points to.
- * @phy: Info about the attached PHYs
- * @mii_bus: If there is a bus we need to create an instance for it
- * @link: Track if the ports have a physical link
- * @sw_priv: Pointer to the switches private data
- * @vlan_map: RX VID tracking
- */
-
-struct mtk_eth {
- struct device *dev;
- void __iomem *base;
- spinlock_t page_lock;
- struct mtk_soc_data *soc;
- struct net_device dummy_dev;
- struct net_device *netdev[MTK_MAX_DEVS];
- struct mtk_mac *mac[MTK_MAX_DEVS];
- struct device_node *switch_np;
- int irq;
- u32 msg_enable;
- unsigned long sysclk;
- struct regmap *ethsys;
- atomic_t dma_refcnt;
- struct mtk_tx_ring tx_ring;
- struct mtk_rx_ring rx_ring[2];
- struct napi_struct rx_napi;
- struct mtk_tx_dma *scratch_ring;
- void *scratch_head;
- struct mtk_phy *phy;
- struct mii_bus *mii_bus;
- int link[8];
- void *sw_priv;
- unsigned long vlan_map;
-};
-
-/* struct mtk_mac - the structure that holds the info about the MACs of the
- * SoC
- * @id: The number of the MAC
- * @of_node: Our devicetree node
- * @hw: Backpointer to our main datastruture
- * @hw_stats: Packet statistics counter
- * @phy_dev: The attached PHY if available
- * @phy_flags: The PHYs flags
- * @pending_work: The workqueue used to reset the dma ring
- */
-struct mtk_mac {
- int id;
- struct device_node *of_node;
- struct mtk_eth *hw;
- struct mtk_hw_stats *hw_stats;
- struct phy_device *phy_dev;
- u32 phy_flags;
- struct work_struct pending_work;
-};
-
-/* the struct describing the SoC. these are declared in the soc_xyz.c files */
-extern const struct of_device_id of_mtk_match[];
-
-/* read the hardware status register */
-void mtk_stats_update_mac(struct mtk_mac *mac);
-
-/* default checksum setup handler */
-void mtk_reset(struct mtk_eth *eth, u32 reset_bits);
-
-/* register i/o wrappers */
-void mtk_w32(struct mtk_eth *eth, u32 val, unsigned int reg);
-u32 mtk_r32(struct mtk_eth *eth, unsigned int reg);
-
-/* default clock calibration handler */
-int mtk_set_clock_cycle(struct mtk_eth *eth);
-
-/* default checksum setup handler */
-void mtk_csum_config(struct mtk_eth *eth);
-
-/* default forward config handler */
-void mtk_fwd_config(struct mtk_eth *eth);
-
-#endif /* MTK_ETH_H */
diff --git a/drivers/staging/mt7621-eth/soc_mt7621.c b/drivers/staging/mt7621-eth/soc_mt7621.c
deleted file mode 100644
index 5d63b5d96f6b..000000000000
--- a/drivers/staging/mt7621-eth/soc_mt7621.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
- * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/if_vlan.h>
-#include <linux/of_net.h>
-
-#include <asm/mach-ralink/ralink_regs.h>
-
-#include "mtk_eth_soc.h"
-#include "gsw_mt7620.h"
-#include "mdio.h"
-
-#define MT7620_CDMA_CSG_CFG 0x400
-#define MT7621_CDMP_IG_CTRL (MT7620_CDMA_CSG_CFG + 0x00)
-#define MT7621_CDMP_EG_CTRL (MT7620_CDMA_CSG_CFG + 0x04)
-#define MT7621_RESET_FE BIT(6)
-#define MT7621_L4_VALID BIT(24)
-
-#define MT7621_TX_DMA_UDF BIT(19)
-
-#define CDMA_ICS_EN BIT(2)
-#define CDMA_UCS_EN BIT(1)
-#define CDMA_TCS_EN BIT(0)
-
-#define GDMA_ICS_EN BIT(22)
-#define GDMA_TCS_EN BIT(21)
-#define GDMA_UCS_EN BIT(20)
-
-/* frame engine counters */
-#define MT7621_REG_MIB_OFFSET 0x2000
-#define MT7621_PPE_AC_BCNT0 (MT7621_REG_MIB_OFFSET + 0x00)
-#define MT7621_GDM1_TX_GBCNT (MT7621_REG_MIB_OFFSET + 0x400)
-#define MT7621_GDM2_TX_GBCNT (MT7621_GDM1_TX_GBCNT + 0x40)
-
-#define GSW_REG_GDMA1_MAC_ADRL 0x508
-#define GSW_REG_GDMA1_MAC_ADRH 0x50C
-#define GSW_REG_GDMA2_MAC_ADRL 0x1508
-#define GSW_REG_GDMA2_MAC_ADRH 0x150C
-
-#define MT7621_MTK_RST_GL 0x04
-#define MT7620_MTK_INT_STATUS2 0x08
-
-/* MTK_INT_STATUS reg on mt7620 define CNT_GDM1_AF at BIT(29)
- * but after test it should be BIT(13).
- */
-#define MT7621_MTK_GDM1_AF BIT(28)
-#define MT7621_MTK_GDM2_AF BIT(29)
-
-static const u16 mt7621_reg_table[MTK_REG_COUNT] = {
- [MTK_REG_PDMA_GLO_CFG] = RT5350_PDMA_GLO_CFG,
- [MTK_REG_PDMA_RST_CFG] = RT5350_PDMA_RST_CFG,
- [MTK_REG_DLY_INT_CFG] = RT5350_DLY_INT_CFG,
- [MTK_REG_TX_BASE_PTR0] = RT5350_TX_BASE_PTR0,
- [MTK_REG_TX_MAX_CNT0] = RT5350_TX_MAX_CNT0,
- [MTK_REG_TX_CTX_IDX0] = RT5350_TX_CTX_IDX0,
- [MTK_REG_TX_DTX_IDX0] = RT5350_TX_DTX_IDX0,
- [MTK_REG_RX_BASE_PTR0] = RT5350_RX_BASE_PTR0,
- [MTK_REG_RX_MAX_CNT0] = RT5350_RX_MAX_CNT0,
- [MTK_REG_RX_CALC_IDX0] = RT5350_RX_CALC_IDX0,
- [MTK_REG_RX_DRX_IDX0] = RT5350_RX_DRX_IDX0,
- [MTK_REG_MTK_INT_ENABLE] = RT5350_MTK_INT_ENABLE,
- [MTK_REG_MTK_INT_STATUS] = RT5350_MTK_INT_STATUS,
- [MTK_REG_MTK_DMA_VID_BASE] = 0,
- [MTK_REG_MTK_COUNTER_BASE] = MT7621_GDM1_TX_GBCNT,
- [MTK_REG_MTK_RST_GL] = MT7621_MTK_RST_GL,
- [MTK_REG_MTK_INT_STATUS2] = MT7620_MTK_INT_STATUS2,
-};
-
-static void mt7621_mtk_reset(struct mtk_eth *eth)
-{
- mtk_reset(eth, MT7621_RESET_FE);
-}
-
-static int mt7621_fwd_config(struct mtk_eth *eth)
-{
- /* Setup GMAC1 only, there is no support for GMAC2 yet */
- mtk_w32(eth, mtk_r32(eth, MT7620_GDMA1_FWD_CFG) & ~0xffff,
- MT7620_GDMA1_FWD_CFG);
-
- /* Enable RX checksum */
- mtk_w32(eth, mtk_r32(eth, MT7620_GDMA1_FWD_CFG) | (GDMA_ICS_EN |
- GDMA_TCS_EN | GDMA_UCS_EN),
- MT7620_GDMA1_FWD_CFG);
-
- /* Enable RX VLan Offloading */
- mtk_w32(eth, 0, MT7621_CDMP_EG_CTRL);
-
- return 0;
-}
-
-static void mt7621_set_mac(struct mtk_mac *mac, unsigned char *hwaddr)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&mac->hw->page_lock, flags);
- if (mac->id == 0) {
- mtk_w32(mac->hw, (hwaddr[0] << 8) | hwaddr[1],
- GSW_REG_GDMA1_MAC_ADRH);
- mtk_w32(mac->hw, (hwaddr[2] << 24) | (hwaddr[3] << 16) |
- (hwaddr[4] << 8) | hwaddr[5],
- GSW_REG_GDMA1_MAC_ADRL);
- }
- if (mac->id == 1) {
- mtk_w32(mac->hw, (hwaddr[0] << 8) | hwaddr[1],
- GSW_REG_GDMA2_MAC_ADRH);
- mtk_w32(mac->hw, (hwaddr[2] << 24) | (hwaddr[3] << 16) |
- (hwaddr[4] << 8) | hwaddr[5],
- GSW_REG_GDMA2_MAC_ADRL);
- }
- spin_unlock_irqrestore(&mac->hw->page_lock, flags);
-}
-
-static struct mtk_soc_data mt7621_data = {
- .hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
- NETIF_F_IPV6_CSUM,
- .dma_type = MTK_PDMA,
- .dma_ring_size = 256,
- .napi_weight = 64,
- .new_stats = 1,
- .padding_64b = 1,
- .rx_2b_offset = 1,
- .rx_sg_dma = 1,
- .has_switch = 1,
- .mac_count = 2,
- .reset_fe = mt7621_mtk_reset,
- .set_mac = mt7621_set_mac,
- .fwd_config = mt7621_fwd_config,
- .switch_init = mtk_gsw_init,
- .reg_table = mt7621_reg_table,
- .pdma_glo_cfg = MTK_PDMA_SIZE_16DWORDS,
- .rx_int = RT5350_RX_DONE_INT,
- .tx_int = RT5350_TX_DONE_INT,
- .status_int = MT7621_MTK_GDM1_AF | MT7621_MTK_GDM2_AF,
- .checksum_bit = MT7621_L4_VALID,
- .has_carrier = mt7620_has_carrier,
- .mdio_read = mt7620_mdio_read,
- .mdio_write = mt7620_mdio_write,
- .mdio_adjust_link = mt7620_mdio_link_adjust,
-};
-
-const struct of_device_id of_mtk_match[] = {
- { .compatible = "mediatek,mt7621-eth", .data = &mt7621_data },
- {},
-};
-
-MODULE_DEVICE_TABLE(of, of_mtk_match);
diff --git a/drivers/staging/mt7621-pci/Kconfig b/drivers/staging/mt7621-pci/Kconfig
index d33533872a16..c8fa17cfa807 100644
--- a/drivers/staging/mt7621-pci/Kconfig
+++ b/drivers/staging/mt7621-pci/Kconfig
@@ -1,6 +1,7 @@
config PCI_MT7621
tristate "MediaTek MT7621 PCI Controller"
depends on RALINK
+ depends on PCI
select PCI_DRIVERS_GENERIC
help
This selects a driver for the MediaTek MT7621 PCI Controller.
diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c
index d6248eecf123..2aee64fdaec5 100644
--- a/drivers/staging/octeon/ethernet-mdio.c
+++ b/drivers/staging/octeon/ethernet-mdio.c
@@ -163,7 +163,7 @@ int cvm_oct_phy_setup_device(struct net_device *dev)
goto no_phy;
phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0,
- PHY_INTERFACE_MODE_GMII);
+ priv->phy_mode);
of_node_put(phy_node);
if (!phydev)
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index ce61c5670ef6..986db76705cc 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -653,14 +653,37 @@ static struct device_node *cvm_oct_node_for_port(struct device_node *pip,
return np;
}
-static void cvm_set_rgmii_delay(struct device_node *np, int iface, int port)
+static void cvm_set_rgmii_delay(struct octeon_ethernet *priv, int iface,
+ int port)
{
+ struct device_node *np = priv->of_node;
u32 delay_value;
+ bool rx_delay;
+ bool tx_delay;
- if (!of_property_read_u32(np, "rx-delay", &delay_value))
+ /* By default, both RX/TX delay is enabled in
+ * __cvmx_helper_rgmii_enable().
+ */
+ rx_delay = true;
+ tx_delay = true;
+
+ if (!of_property_read_u32(np, "rx-delay", &delay_value)) {
cvmx_write_csr(CVMX_ASXX_RX_CLK_SETX(port, iface), delay_value);
- if (!of_property_read_u32(np, "tx-delay", &delay_value))
+ rx_delay = delay_value > 0;
+ }
+ if (!of_property_read_u32(np, "tx-delay", &delay_value)) {
cvmx_write_csr(CVMX_ASXX_TX_CLK_SETX(port, iface), delay_value);
+ tx_delay = delay_value > 0;
+ }
+
+ if (!rx_delay && !tx_delay)
+ priv->phy_mode = PHY_INTERFACE_MODE_RGMII_ID;
+ else if (!rx_delay)
+ priv->phy_mode = PHY_INTERFACE_MODE_RGMII_RXID;
+ else if (!tx_delay)
+ priv->phy_mode = PHY_INTERFACE_MODE_RGMII_TXID;
+ else
+ priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
}
static int cvm_oct_probe(struct platform_device *pdev)
@@ -825,6 +848,7 @@ static int cvm_oct_probe(struct platform_device *pdev)
priv->port = port;
priv->queue = cvmx_pko_get_base_queue(priv->port);
priv->fau = fau - cvmx_pko_get_num_queues(port) * 4;
+ priv->phy_mode = PHY_INTERFACE_MODE_NA;
for (qos = 0; qos < 16; qos++)
skb_queue_head_init(&priv->tx_free_list[qos]);
for (qos = 0; qos < cvmx_pko_get_num_queues(port);
@@ -856,6 +880,7 @@ static int cvm_oct_probe(struct platform_device *pdev)
break;
case CVMX_HELPER_INTERFACE_MODE_SGMII:
+ priv->phy_mode = PHY_INTERFACE_MODE_SGMII;
dev->netdev_ops = &cvm_oct_sgmii_netdev_ops;
strcpy(dev->name, "eth%d");
break;
@@ -865,11 +890,16 @@ static int cvm_oct_probe(struct platform_device *pdev)
strcpy(dev->name, "spi%d");
break;
- case CVMX_HELPER_INTERFACE_MODE_RGMII:
case CVMX_HELPER_INTERFACE_MODE_GMII:
+ priv->phy_mode = PHY_INTERFACE_MODE_GMII;
+ dev->netdev_ops = &cvm_oct_rgmii_netdev_ops;
+ strcpy(dev->name, "eth%d");
+ break;
+
+ case CVMX_HELPER_INTERFACE_MODE_RGMII:
dev->netdev_ops = &cvm_oct_rgmii_netdev_ops;
strcpy(dev->name, "eth%d");
- cvm_set_rgmii_delay(priv->of_node, interface,
+ cvm_set_rgmii_delay(priv, interface,
port_index);
break;
}
diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h
index 4a07e7f43d12..be570d33685a 100644
--- a/drivers/staging/octeon/octeon-ethernet.h
+++ b/drivers/staging/octeon/octeon-ethernet.h
@@ -12,7 +12,7 @@
#define OCTEON_ETHERNET_H
#include <linux/of.h>
-
+#include <linux/phy.h>
#include <asm/octeon/cvmx-helper-board.h>
/**
@@ -33,6 +33,8 @@ struct octeon_ethernet {
* cvmx_helper_interface_mode_t
*/
int imode;
+ /* PHY mode */
+ phy_interface_t phy_mode;
/* List of outstanding tx buffers per queue */
struct sk_buff_head tx_free_list[16];
unsigned int last_speed;
diff --git a/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c b/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c
index 80b8d4153414..a54286498a47 100644
--- a/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c
+++ b/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c
@@ -45,7 +45,7 @@ static int dcon_init_xo_1(struct dcon_priv *dcon)
{
unsigned char lob;
int ret, i;
- struct dcon_gpio *pin = &gpios_asis[0];
+ const struct dcon_gpio *pin = &gpios_asis[0];
for (i = 0; i < ARRAY_SIZE(gpios_asis); i++) {
gpios[i] = devm_gpiod_get(&dcon->client->dev, pin[i].name,
diff --git a/drivers/staging/rtl8188eu/core/rtw_xmit.c b/drivers/staging/rtl8188eu/core/rtw_xmit.c
index 1723a47a96b4..952f2ab51347 100644
--- a/drivers/staging/rtl8188eu/core/rtw_xmit.c
+++ b/drivers/staging/rtl8188eu/core/rtw_xmit.c
@@ -174,7 +174,9 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf;
- rtw_alloc_hwxmits(padapter);
+ res = rtw_alloc_hwxmits(padapter);
+ if (res == _FAIL)
+ goto exit;
rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry);
for (i = 0; i < 4; i++)
@@ -1503,7 +1505,7 @@ exit:
return res;
}
-void rtw_alloc_hwxmits(struct adapter *padapter)
+s32 rtw_alloc_hwxmits(struct adapter *padapter)
{
struct hw_xmit *hwxmits;
struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
@@ -1512,6 +1514,8 @@ void rtw_alloc_hwxmits(struct adapter *padapter)
pxmitpriv->hwxmits = kcalloc(pxmitpriv->hwxmit_entry,
sizeof(struct hw_xmit), GFP_KERNEL);
+ if (!pxmitpriv->hwxmits)
+ return _FAIL;
hwxmits = pxmitpriv->hwxmits;
@@ -1519,6 +1523,7 @@ void rtw_alloc_hwxmits(struct adapter *padapter)
hwxmits[1] .sta_queue = &pxmitpriv->vi_pending;
hwxmits[2] .sta_queue = &pxmitpriv->be_pending;
hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
+ return _SUCCESS;
}
void rtw_free_hwxmits(struct adapter *padapter)
diff --git a/drivers/staging/rtl8188eu/include/rtw_xmit.h b/drivers/staging/rtl8188eu/include/rtw_xmit.h
index 788f59c74ea1..ba7e15fbde72 100644
--- a/drivers/staging/rtl8188eu/include/rtw_xmit.h
+++ b/drivers/staging/rtl8188eu/include/rtw_xmit.h
@@ -336,7 +336,7 @@ s32 rtw_txframes_sta_ac_pending(struct adapter *padapter,
void rtw_init_hwxmits(struct hw_xmit *phwxmit, int entry);
s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter);
void _rtw_free_xmit_priv(struct xmit_priv *pxmitpriv);
-void rtw_alloc_hwxmits(struct adapter *padapter);
+s32 rtw_alloc_hwxmits(struct adapter *padapter);
void rtw_free_hwxmits(struct adapter *padapter);
s32 rtw_xmit(struct adapter *padapter, struct sk_buff **pkt);
diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c
index 1920d02f7c9f..8c36acedf507 100644
--- a/drivers/staging/rtl8712/rtl8712_cmd.c
+++ b/drivers/staging/rtl8712/rtl8712_cmd.c
@@ -147,17 +147,9 @@ static u8 write_macreg_hdl(struct _adapter *padapter, u8 *pbuf)
static u8 read_bbreg_hdl(struct _adapter *padapter, u8 *pbuf)
{
- u32 val;
- void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd);
struct cmd_obj *pcmd = (struct cmd_obj *)pbuf;
- if (pcmd->rsp && pcmd->rspsz > 0)
- memcpy(pcmd->rsp, (u8 *)&val, pcmd->rspsz);
- pcmd_callback = cmd_callback[pcmd->cmdcode].callback;
- if (!pcmd_callback)
- r8712_free_cmd_obj(pcmd);
- else
- pcmd_callback(padapter, pcmd);
+ r8712_free_cmd_obj(pcmd);
return H2C_SUCCESS;
}
diff --git a/drivers/staging/rtl8712/rtl8712_cmd.h b/drivers/staging/rtl8712/rtl8712_cmd.h
index 92fb77666d44..1ef86b8c592f 100644
--- a/drivers/staging/rtl8712/rtl8712_cmd.h
+++ b/drivers/staging/rtl8712/rtl8712_cmd.h
@@ -140,7 +140,7 @@ enum rtl8712_h2c_cmd {
static struct _cmd_callback cmd_callback[] = {
{GEN_CMD_CODE(_Read_MACREG), NULL}, /*0*/
{GEN_CMD_CODE(_Write_MACREG), NULL},
- {GEN_CMD_CODE(_Read_BBREG), &r8712_getbbrfreg_cmdrsp_callback},
+ {GEN_CMD_CODE(_Read_BBREG), NULL},
{GEN_CMD_CODE(_Write_BBREG), NULL},
{GEN_CMD_CODE(_Read_RFREG), &r8712_getbbrfreg_cmdrsp_callback},
{GEN_CMD_CODE(_Write_RFREG), NULL}, /*5*/
diff --git a/drivers/staging/rtl8723bs/core/rtw_xmit.c b/drivers/staging/rtl8723bs/core/rtw_xmit.c
index 094d61bcb469..b87f13a0b563 100644
--- a/drivers/staging/rtl8723bs/core/rtw_xmit.c
+++ b/drivers/staging/rtl8723bs/core/rtw_xmit.c
@@ -260,7 +260,9 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
}
}
- rtw_alloc_hwxmits(padapter);
+ res = rtw_alloc_hwxmits(padapter);
+ if (res == _FAIL)
+ goto exit;
rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry);
for (i = 0; i < 4; i++) {
@@ -2144,7 +2146,7 @@ exit:
return res;
}
-void rtw_alloc_hwxmits(struct adapter *padapter)
+s32 rtw_alloc_hwxmits(struct adapter *padapter)
{
struct hw_xmit *hwxmits;
struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
@@ -2155,10 +2157,8 @@ void rtw_alloc_hwxmits(struct adapter *padapter)
pxmitpriv->hwxmits = rtw_zmalloc(sizeof(struct hw_xmit) * pxmitpriv->hwxmit_entry);
- if (pxmitpriv->hwxmits == NULL) {
- DBG_871X("alloc hwxmits fail!...\n");
- return;
- }
+ if (!pxmitpriv->hwxmits)
+ return _FAIL;
hwxmits = pxmitpriv->hwxmits;
@@ -2204,7 +2204,7 @@ void rtw_alloc_hwxmits(struct adapter *padapter)
}
-
+ return _SUCCESS;
}
void rtw_free_hwxmits(struct adapter *padapter)
diff --git a/drivers/staging/rtl8723bs/include/rtw_xmit.h b/drivers/staging/rtl8723bs/include/rtw_xmit.h
index 1b38b9182b31..37f42b2f22f1 100644
--- a/drivers/staging/rtl8723bs/include/rtw_xmit.h
+++ b/drivers/staging/rtl8723bs/include/rtw_xmit.h
@@ -487,7 +487,7 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter);
void _rtw_free_xmit_priv (struct xmit_priv *pxmitpriv);
-void rtw_alloc_hwxmits(struct adapter *padapter);
+s32 rtw_alloc_hwxmits(struct adapter *padapter);
void rtw_free_hwxmits(struct adapter *padapter);
diff --git a/drivers/staging/rtlwifi/phydm/rtl_phydm.c b/drivers/staging/rtlwifi/phydm/rtl_phydm.c
index 9930ed954abb..4cc77b2016e1 100644
--- a/drivers/staging/rtlwifi/phydm/rtl_phydm.c
+++ b/drivers/staging/rtlwifi/phydm/rtl_phydm.c
@@ -180,6 +180,8 @@ static int rtl_phydm_init_priv(struct rtl_priv *rtlpriv,
rtlpriv->phydm.internal =
kzalloc(sizeof(struct phy_dm_struct), GFP_KERNEL);
+ if (!rtlpriv->phydm.internal)
+ return 0;
_rtl_phydm_init_com_info(rtlpriv, ic, params);
diff --git a/drivers/staging/rtlwifi/rtl8822be/fw.c b/drivers/staging/rtlwifi/rtl8822be/fw.c
index f061dd1382aa..cf6b7a80b753 100644
--- a/drivers/staging/rtlwifi/rtl8822be/fw.c
+++ b/drivers/staging/rtlwifi/rtl8822be/fw.c
@@ -743,6 +743,8 @@ void rtl8822be_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished)
u1_rsvd_page_loc, 3);
skb = dev_alloc_skb(totalpacketlen);
+ if (!skb)
+ return;
memcpy((u8 *)skb_put(skb, totalpacketlen), &reserved_page_packet,
totalpacketlen);
diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c
index edff6ce85655..9d85a3a1af4c 100644
--- a/drivers/staging/speakup/speakup_soft.c
+++ b/drivers/staging/speakup/speakup_soft.c
@@ -210,12 +210,15 @@ static ssize_t softsynthx_read(struct file *fp, char __user *buf, size_t count,
return -EINVAL;
spin_lock_irqsave(&speakup_info.spinlock, flags);
+ synth_soft.alive = 1;
while (1) {
prepare_to_wait(&speakup_event, &wait, TASK_INTERRUPTIBLE);
- if (!unicode)
- synth_buffer_skip_nonlatin1();
- if (!synth_buffer_empty() || speakup_info.flushing)
- break;
+ if (synth_current() == &synth_soft) {
+ if (!unicode)
+ synth_buffer_skip_nonlatin1();
+ if (!synth_buffer_empty() || speakup_info.flushing)
+ break;
+ }
spin_unlock_irqrestore(&speakup_info.spinlock, flags);
if (fp->f_flags & O_NONBLOCK) {
finish_wait(&speakup_event, &wait);
@@ -235,6 +238,8 @@ static ssize_t softsynthx_read(struct file *fp, char __user *buf, size_t count,
/* Keep 3 bytes available for a 16bit UTF-8-encoded character */
while (chars_sent <= count - bytes_per_ch) {
+ if (synth_current() != &synth_soft)
+ break;
if (speakup_info.flushing) {
speakup_info.flushing = 0;
ch = '\x18';
@@ -331,7 +336,8 @@ static __poll_t softsynth_poll(struct file *fp, struct poll_table_struct *wait)
poll_wait(fp, &speakup_event, wait);
spin_lock_irqsave(&speakup_info.spinlock, flags);
- if (!synth_buffer_empty() || speakup_info.flushing)
+ if (synth_current() == &synth_soft &&
+ (!synth_buffer_empty() || speakup_info.flushing))
ret = EPOLLIN | EPOLLRDNORM;
spin_unlock_irqrestore(&speakup_info.spinlock, flags);
return ret;
diff --git a/drivers/staging/speakup/spk_priv.h b/drivers/staging/speakup/spk_priv.h
index c8e688878fc7..ac6a74883af4 100644
--- a/drivers/staging/speakup/spk_priv.h
+++ b/drivers/staging/speakup/spk_priv.h
@@ -74,6 +74,7 @@ int synth_request_region(unsigned long start, unsigned long n);
int synth_release_region(unsigned long start, unsigned long n);
int synth_add(struct spk_synth *in_synth);
void synth_remove(struct spk_synth *in_synth);
+struct spk_synth *synth_current(void);
extern struct speakup_info_t speakup_info;
diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c
index 25f259ee4ffc..3568bfb89912 100644
--- a/drivers/staging/speakup/synth.c
+++ b/drivers/staging/speakup/synth.c
@@ -481,4 +481,10 @@ void synth_remove(struct spk_synth *in_synth)
}
EXPORT_SYMBOL_GPL(synth_remove);
+struct spk_synth *synth_current(void)
+{
+ return synth;
+}
+EXPORT_SYMBOL_GPL(synth_current);
+
short spk_punc_masks[] = { 0, SOME, MOST, PUNC, PUNC | B_SYM };
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index 804daf83be35..064d0db4c51e 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -3513,6 +3513,7 @@ static int vchiq_probe(struct platform_device *pdev)
struct device_node *fw_node;
const struct of_device_id *of_id;
struct vchiq_drvdata *drvdata;
+ struct device *vchiq_dev;
int err;
of_id = of_match_node(vchiq_of_match, pdev->dev.of_node);
@@ -3547,9 +3548,12 @@ static int vchiq_probe(struct platform_device *pdev)
goto failed_platform_init;
}
- if (IS_ERR(device_create(vchiq_class, &pdev->dev, vchiq_devid,
- NULL, "vchiq")))
+ vchiq_dev = device_create(vchiq_class, &pdev->dev, vchiq_devid, NULL,
+ "vchiq");
+ if (IS_ERR(vchiq_dev)) {
+ err = PTR_ERR(vchiq_dev);
goto failed_device_create;
+ }
vchiq_debugfs_init();
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index b370985b58a1..c6bb4aaf9bd0 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -1033,8 +1033,6 @@ static void vnt_interrupt_process(struct vnt_private *priv)
return;
}
- MACvIntDisable(priv->PortOffset);
-
spin_lock_irqsave(&priv->lock, flags);
/* Read low level stats */
@@ -1122,8 +1120,6 @@ static void vnt_interrupt_process(struct vnt_private *priv)
}
spin_unlock_irqrestore(&priv->lock, flags);
-
- MACvIntEnable(priv->PortOffset, IMR_MASK_VALUE);
}
static void vnt_interrupt_work(struct work_struct *work)
@@ -1133,14 +1129,17 @@ static void vnt_interrupt_work(struct work_struct *work)
if (priv->vif)
vnt_interrupt_process(priv);
+
+ MACvIntEnable(priv->PortOffset, IMR_MASK_VALUE);
}
static irqreturn_t vnt_interrupt(int irq, void *arg)
{
struct vnt_private *priv = arg;
- if (priv->vif)
- schedule_work(&priv->interrupt_work);
+ schedule_work(&priv->interrupt_work);
+
+ MACvIntDisable(priv->PortOffset);
return IRQ_HANDLED;
}
diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
index db5df3d54818..3bdd56a1021b 100644
--- a/drivers/tty/serial/ar933x_uart.c
+++ b/drivers/tty/serial/ar933x_uart.c
@@ -49,11 +49,6 @@ struct ar933x_uart_port {
struct clk *clk;
};
-static inline bool ar933x_uart_console_enabled(void)
-{
- return IS_ENABLED(CONFIG_SERIAL_AR933X_CONSOLE);
-}
-
static inline unsigned int ar933x_uart_read(struct ar933x_uart_port *up,
int offset)
{
@@ -508,6 +503,7 @@ static const struct uart_ops ar933x_uart_ops = {
.verify_port = ar933x_uart_verify_port,
};
+#ifdef CONFIG_SERIAL_AR933X_CONSOLE
static struct ar933x_uart_port *
ar933x_console_ports[CONFIG_SERIAL_AR933X_NR_UARTS];
@@ -604,14 +600,7 @@ static struct console ar933x_uart_console = {
.index = -1,
.data = &ar933x_uart_driver,
};
-
-static void ar933x_uart_add_console_port(struct ar933x_uart_port *up)
-{
- if (!ar933x_uart_console_enabled())
- return;
-
- ar933x_console_ports[up->port.line] = up;
-}
+#endif /* CONFIG_SERIAL_AR933X_CONSOLE */
static struct uart_driver ar933x_uart_driver = {
.owner = THIS_MODULE,
@@ -700,7 +689,9 @@ static int ar933x_uart_probe(struct platform_device *pdev)
baud = ar933x_uart_get_baud(port->uartclk, 0, AR933X_UART_MAX_STEP);
up->max_baud = min_t(unsigned int, baud, AR933X_UART_MAX_BAUD);
- ar933x_uart_add_console_port(up);
+#ifdef CONFIG_SERIAL_AR933X_CONSOLE
+ ar933x_console_ports[up->port.line] = up;
+#endif
ret = uart_add_one_port(&ar933x_uart_driver, &up->port);
if (ret)
@@ -749,8 +740,9 @@ static int __init ar933x_uart_init(void)
{
int ret;
- if (ar933x_uart_console_enabled())
- ar933x_uart_driver.cons = &ar933x_uart_console;
+#ifdef CONFIG_SERIAL_AR933X_CONSOLE
+ ar933x_uart_driver.cons = &ar933x_uart_console;
+#endif
ret = uart_register_driver(&ar933x_uart_driver);
if (ret)
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 05147fe24343..0b4f36905321 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -166,6 +166,8 @@ struct atmel_uart_port {
unsigned int pending_status;
spinlock_t lock_suspended;
+ bool hd_start_rx; /* can start RX during half-duplex operation */
+
/* ISO7816 */
unsigned int fidi_min;
unsigned int fidi_max;
@@ -231,6 +233,13 @@ static inline void atmel_uart_write_char(struct uart_port *port, u8 value)
__raw_writeb(value, port->membase + ATMEL_US_THR);
}
+static inline int atmel_uart_is_half_duplex(struct uart_port *port)
+{
+ return ((port->rs485.flags & SER_RS485_ENABLED) &&
+ !(port->rs485.flags & SER_RS485_RX_DURING_TX)) ||
+ (port->iso7816.flags & SER_ISO7816_ENABLED);
+}
+
#ifdef CONFIG_SERIAL_ATMEL_PDC
static bool atmel_use_pdc_rx(struct uart_port *port)
{
@@ -608,10 +617,9 @@ static void atmel_stop_tx(struct uart_port *port)
/* Disable interrupts */
atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask);
- if (((port->rs485.flags & SER_RS485_ENABLED) &&
- !(port->rs485.flags & SER_RS485_RX_DURING_TX)) ||
- port->iso7816.flags & SER_ISO7816_ENABLED)
+ if (atmel_uart_is_half_duplex(port))
atmel_start_rx(port);
+
}
/*
@@ -628,9 +636,7 @@ static void atmel_start_tx(struct uart_port *port)
return;
if (atmel_use_pdc_tx(port) || atmel_use_dma_tx(port))
- if (((port->rs485.flags & SER_RS485_ENABLED) &&
- !(port->rs485.flags & SER_RS485_RX_DURING_TX)) ||
- port->iso7816.flags & SER_ISO7816_ENABLED)
+ if (atmel_uart_is_half_duplex(port))
atmel_stop_rx(port);
if (atmel_use_pdc_tx(port))
@@ -928,11 +934,14 @@ static void atmel_complete_tx_dma(void *arg)
*/
if (!uart_circ_empty(xmit))
atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx);
- else if (((port->rs485.flags & SER_RS485_ENABLED) &&
- !(port->rs485.flags & SER_RS485_RX_DURING_TX)) ||
- port->iso7816.flags & SER_ISO7816_ENABLED) {
- /* DMA done, stop TX, start RX for RS485 */
- atmel_start_rx(port);
+ else if (atmel_uart_is_half_duplex(port)) {
+ /*
+ * DMA done, re-enable TXEMPTY and signal that we can stop
+ * TX and start RX for RS485
+ */
+ atmel_port->hd_start_rx = true;
+ atmel_uart_writel(port, ATMEL_US_IER,
+ atmel_port->tx_done_mask);
}
spin_unlock_irqrestore(&port->lock, flags);
@@ -1288,6 +1297,10 @@ static int atmel_prepare_rx_dma(struct uart_port *port)
sg_dma_len(&atmel_port->sg_rx)/2,
DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT);
+ if (!desc) {
+ dev_err(port->dev, "Preparing DMA cyclic failed\n");
+ goto chan_err;
+ }
desc->callback = atmel_complete_rx_dma;
desc->callback_param = port;
atmel_port->desc_rx = desc;
@@ -1376,9 +1389,20 @@ atmel_handle_transmit(struct uart_port *port, unsigned int pending)
struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
if (pending & atmel_port->tx_done_mask) {
- /* Either PDC or interrupt transmission */
atmel_uart_writel(port, ATMEL_US_IDR,
atmel_port->tx_done_mask);
+
+ /* Start RX if flag was set and FIFO is empty */
+ if (atmel_port->hd_start_rx) {
+ if (!(atmel_uart_readl(port, ATMEL_US_CSR)
+ & ATMEL_US_TXEMPTY))
+ dev_warn(port->dev, "Should start RX, but TX fifo is not empty\n");
+
+ atmel_port->hd_start_rx = false;
+ atmel_start_rx(port);
+ return;
+ }
+
atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx);
}
}
@@ -1508,9 +1532,7 @@ static void atmel_tx_pdc(struct uart_port *port)
atmel_uart_writel(port, ATMEL_US_IER,
atmel_port->tx_done_mask);
} else {
- if (((port->rs485.flags & SER_RS485_ENABLED) &&
- !(port->rs485.flags & SER_RS485_RX_DURING_TX)) ||
- port->iso7816.flags & SER_ISO7816_ENABLED) {
+ if (atmel_uart_is_half_duplex(port)) {
/* DMA done, stop TX, start RX for RS485 */
atmel_start_rx(port);
}
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index 6fb312e7af71..bfe5e9e034ec 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -148,8 +148,10 @@ static int configure_kgdboc(void)
char *cptr = config;
struct console *cons;
- if (!strlen(config) || isspace(config[0]))
+ if (!strlen(config) || isspace(config[0])) {
+ err = 0;
goto noconfig;
+ }
kgdboc_io_ops.is_console = 0;
kgdb_tty_driver = NULL;
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index f5bdde405627..450ba6d7996c 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1415,6 +1415,8 @@ static int max310x_spi_probe(struct spi_device *spi)
if (spi->dev.of_node) {
const struct of_device_id *of_id =
of_match_device(max310x_dt_ids, &spi->dev);
+ if (!of_id)
+ return -ENODEV;
devtype = (struct max310x_devtype *)of_id->data;
} else {
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index 231f751d1ef4..7e7b1559fa36 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -810,6 +810,9 @@ static int mvebu_uart_probe(struct platform_device *pdev)
return -EINVAL;
}
+ if (!match)
+ return -ENODEV;
+
/* Assume that all UART ports have a DT alias or none has */
id = of_alias_get_id(pdev->dev.of_node, "serial");
if (!pdev->dev.of_node || id < 0)
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 27235a526cce..4c188f4079b3 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1686,6 +1686,10 @@ static int mxs_auart_probe(struct platform_device *pdev)
s->port.mapbase = r->start;
s->port.membase = ioremap(r->start, resource_size(r));
+ if (!s->port.membase) {
+ ret = -ENOMEM;
+ goto out_disable_clks;
+ }
s->port.ops = &mxs_auart_ops;
s->port.iotype = UPIO_MEM;
s->port.fifosize = MXS_AUART_FIFO_SIZE;
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 3bcec1c20219..35e5f9c5d5be 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1050,7 +1050,7 @@ static int __init qcom_geni_console_setup(struct console *co, char *options)
{
struct uart_port *uport;
struct qcom_geni_serial_port *port;
- int baud;
+ int baud = 9600;
int bits = 8;
int parity = 'n';
int flow = 'n';
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 635178cf3eed..09a183dfc526 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1507,7 +1507,7 @@ static int __init sc16is7xx_init(void)
ret = i2c_add_driver(&sc16is7xx_i2c_uart_driver);
if (ret < 0) {
pr_err("failed to init sc16is7xx i2c --> %d\n", ret);
- return ret;
+ goto err_i2c;
}
#endif
@@ -1515,10 +1515,18 @@ static int __init sc16is7xx_init(void)
ret = spi_register_driver(&sc16is7xx_spi_uart_driver);
if (ret < 0) {
pr_err("failed to init sc16is7xx spi --> %d\n", ret);
- return ret;
+ goto err_spi;
}
#endif
return ret;
+
+err_spi:
+#ifdef CONFIG_SERIAL_SC16IS7XX_I2C
+ i2c_del_driver(&sc16is7xx_i2c_uart_driver);
+#endif
+err_i2c:
+ uart_unregister_driver(&sc16is7xx_uart);
+ return ret;
}
module_init(sc16is7xx_init);
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 060fcd42b6d5..2d1c626312cd 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -838,19 +838,9 @@ static void sci_transmit_chars(struct uart_port *port)
if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
uart_write_wakeup(port);
- if (uart_circ_empty(xmit)) {
+ if (uart_circ_empty(xmit))
sci_stop_tx(port);
- } else {
- ctrl = serial_port_in(port, SCSCR);
-
- if (port->type != PORT_SCI) {
- serial_port_in(port, SCxSR); /* Dummy read */
- sci_clear_SCxSR(port, SCxSR_TDxE_CLEAR(port));
- }
- ctrl |= SCSCR_TIE;
- serial_port_out(port, SCSCR, ctrl);
- }
}
/* On SH3, SCIF may read end-of-break as a space->mark char */
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 044c3cbdcfa4..a9e12b3bc31d 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -325,7 +325,7 @@ static void tty_port_shutdown(struct tty_port *port, struct tty_struct *tty)
if (tty && C_HUPCL(tty))
tty_port_lower_dtr_rts(port);
- if (port->ops->shutdown)
+ if (port->ops && port->ops->shutdown)
port->ops->shutdown(port);
}
out:
@@ -398,7 +398,7 @@ EXPORT_SYMBOL_GPL(tty_port_tty_wakeup);
*/
int tty_port_carrier_raised(struct tty_port *port)
{
- if (port->ops->carrier_raised == NULL)
+ if (!port->ops || !port->ops->carrier_raised)
return 1;
return port->ops->carrier_raised(port);
}
@@ -414,7 +414,7 @@ EXPORT_SYMBOL(tty_port_carrier_raised);
*/
void tty_port_raise_dtr_rts(struct tty_port *port)
{
- if (port->ops->dtr_rts)
+ if (port->ops && port->ops->dtr_rts)
port->ops->dtr_rts(port, 1);
}
EXPORT_SYMBOL(tty_port_raise_dtr_rts);
@@ -429,7 +429,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts);
*/
void tty_port_lower_dtr_rts(struct tty_port *port)
{
- if (port->ops->dtr_rts)
+ if (port->ops && port->ops->dtr_rts)
port->ops->dtr_rts(port, 0);
}
EXPORT_SYMBOL(tty_port_lower_dtr_rts);
@@ -684,7 +684,7 @@ int tty_port_open(struct tty_port *port, struct tty_struct *tty,
if (!tty_port_initialized(port)) {
clear_bit(TTY_IO_ERROR, &tty->flags);
- if (port->ops->activate) {
+ if (port->ops && port->ops->activate) {
int retval = port->ops->activate(port, tty);
if (retval) {
mutex_unlock(&port->mutex);
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 739f8960811a..ec666eb4b7b4 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -558,10 +558,8 @@ static void acm_softint(struct work_struct *work)
clear_bit(EVENT_RX_STALL, &acm->flags);
}
- if (test_bit(EVENT_TTY_WAKEUP, &acm->flags)) {
+ if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
tty_port_tty_wakeup(&acm->port);
- clear_bit(EVENT_TTY_WAKEUP, &acm->flags);
- }
}
/*
diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c
index 48277bbc15e4..73c8e6591746 100644
--- a/drivers/usb/common/common.c
+++ b/drivers/usb/common/common.c
@@ -145,6 +145,8 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0)
do {
controller = of_find_node_with_property(controller, "phys");
+ if (!of_device_is_available(controller))
+ continue;
index = 0;
do {
if (arg0 == -1) {
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 3189181bb628..975d7c1288e3 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2742,6 +2742,9 @@ int usb_add_hcd(struct usb_hcd *hcd,
retval = usb_phy_roothub_set_mode(hcd->phy_roothub,
PHY_MODE_USB_HOST_SS);
if (retval)
+ retval = usb_phy_roothub_set_mode(hcd->phy_roothub,
+ PHY_MODE_USB_HOST);
+ if (retval)
goto err_usb_phy_roothub_power_on;
retval = usb_phy_roothub_power_on(hcd->phy_roothub);
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index fdc6e4e403e8..8cced3609e24 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -29,6 +29,7 @@
#define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa
#define PCI_DEVICE_ID_INTEL_APL 0x5aaa
#define PCI_DEVICE_ID_INTEL_KBP 0xa2b0
+#define PCI_DEVICE_ID_INTEL_CMLH 0x02ee
#define PCI_DEVICE_ID_INTEL_GLK 0x31aa
#define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee
#define PCI_DEVICE_ID_INTEL_CNPH 0xa36e
@@ -305,6 +306,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD),
(kernel_ulong_t) &dwc3_pci_mrfld_properties, },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLH),
+ (kernel_ulong_t) &dwc3_pci_intel_properties, },
+
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTLP),
(kernel_ulong_t) &dwc3_pci_intel_properties, },
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 75b113a5b25c..f3816a5c861e 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -391,20 +391,20 @@ try_again:
req->complete = f_hidg_req_complete;
req->context = hidg;
+ spin_unlock_irqrestore(&hidg->write_spinlock, flags);
+
status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC);
if (status < 0) {
ERROR(hidg->func.config->cdev,
"usb_ep_queue error on int endpoint %zd\n", status);
- goto release_write_pending_unlocked;
+ goto release_write_pending;
} else {
status = count;
}
- spin_unlock_irqrestore(&hidg->write_spinlock, flags);
return status;
release_write_pending:
spin_lock_irqsave(&hidg->write_spinlock, flags);
-release_write_pending_unlocked:
hidg->write_pending = 0;
spin_unlock_irqrestore(&hidg->write_spinlock, flags);
diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c
index b77f3126580e..c2011cd7df8c 100644
--- a/drivers/usb/gadget/udc/net2272.c
+++ b/drivers/usb/gadget/udc/net2272.c
@@ -945,6 +945,7 @@ net2272_dequeue(struct usb_ep *_ep, struct usb_request *_req)
break;
}
if (&req->req != _req) {
+ ep->stopped = stopped;
spin_unlock_irqrestore(&ep->dev->lock, flags);
return -EINVAL;
}
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index f63f82450bf4..898339e5df10 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -866,9 +866,6 @@ static void start_queue(struct net2280_ep *ep, u32 dmactl, u32 td_dma)
(void) readl(&ep->dev->pci->pcimstctl);
writel(BIT(DMA_START), &dma->dmastat);
-
- if (!ep->is_in)
- stop_out_naking(ep);
}
static void start_dma(struct net2280_ep *ep, struct net2280_request *req)
@@ -907,6 +904,7 @@ static void start_dma(struct net2280_ep *ep, struct net2280_request *req)
writel(BIT(DMA_START), &dma->dmastat);
return;
}
+ stop_out_naking(ep);
}
tmp = dmactl_default;
@@ -1275,9 +1273,9 @@ static int net2280_dequeue(struct usb_ep *_ep, struct usb_request *_req)
break;
}
if (&req->req != _req) {
+ ep->stopped = stopped;
spin_unlock_irqrestore(&ep->dev->lock, flags);
- dev_err(&ep->dev->pdev->dev, "%s: Request mismatch\n",
- __func__);
+ ep_dbg(ep->dev, "%s: Request mismatch\n", __func__);
return -EINVAL;
}
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 934584f0a20a..6343fbacd244 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -3204,6 +3204,9 @@ static int __init u132_hcd_init(void)
printk(KERN_INFO "driver %s\n", hcd_name);
workqueue = create_singlethread_workqueue("u132");
retval = platform_driver_register(&u132_platform_driver);
+ if (retval)
+ destroy_workqueue(workqueue);
+
return retval;
}
diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c
index c78be578abb0..d932cc31711e 100644
--- a/drivers/usb/host/xhci-dbgcap.c
+++ b/drivers/usb/host/xhci-dbgcap.c
@@ -516,7 +516,6 @@ static int xhci_do_dbc_stop(struct xhci_hcd *xhci)
return -1;
writel(0, &dbc->regs->control);
- xhci_dbc_mem_cleanup(xhci);
dbc->state = DS_DISABLED;
return 0;
@@ -562,8 +561,10 @@ static void xhci_dbc_stop(struct xhci_hcd *xhci)
ret = xhci_do_dbc_stop(xhci);
spin_unlock_irqrestore(&dbc->lock, flags);
- if (!ret)
+ if (!ret) {
+ xhci_dbc_mem_cleanup(xhci);
pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller);
+ }
}
static void
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index e2eece693655..96a740543183 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1545,20 +1545,25 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
port_index = max_ports;
while (port_index--) {
u32 t1, t2;
-
+ int retries = 10;
+retry:
t1 = readl(ports[port_index]->addr);
t2 = xhci_port_state_to_neutral(t1);
portsc_buf[port_index] = 0;
- /* Bail out if a USB3 port has a new device in link training */
- if ((hcd->speed >= HCD_USB3) &&
+ /*
+ * Give a USB3 port in link training time to finish, but don't
+ * prevent suspend as port might be stuck
+ */
+ if ((hcd->speed >= HCD_USB3) && retries-- &&
(t1 & PORT_PLS_MASK) == XDEV_POLLING) {
- bus_state->bus_suspended = 0;
spin_unlock_irqrestore(&xhci->lock, flags);
- xhci_dbg(xhci, "Bus suspend bailout, port in polling\n");
- return -EBUSY;
+ msleep(XHCI_PORT_POLLING_LFPS_TIME);
+ spin_lock_irqsave(&xhci->lock, flags);
+ xhci_dbg(xhci, "port %d polling in bus suspend, waiting\n",
+ port_index);
+ goto retry;
}
-
/* suspend ports in U0, or bail out for new connect changes */
if ((t1 & PORT_PE) && (t1 & PORT_PLS_MASK) == XDEV_U0) {
if ((t1 & PORT_CSC) && wake_enabled) {
diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c
index a6e463715779..671bce18782c 100644
--- a/drivers/usb/host/xhci-rcar.c
+++ b/drivers/usb/host/xhci-rcar.c
@@ -246,6 +246,7 @@ int xhci_rcar_init_quirk(struct usb_hcd *hcd)
if (!xhci_rcar_wait_for_pll_active(hcd))
return -ETIMEDOUT;
+ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
return xhci_rcar_download_firmware(hcd);
}
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 40fa25c4d041..9215a28dad40 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1647,10 +1647,13 @@ static void handle_port_status(struct xhci_hcd *xhci,
}
}
- if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_U0 &&
- DEV_SUPERSPEED_ANY(portsc)) {
+ if ((portsc & PORT_PLC) &&
+ DEV_SUPERSPEED_ANY(portsc) &&
+ ((portsc & PORT_PLS_MASK) == XDEV_U0 ||
+ (portsc & PORT_PLS_MASK) == XDEV_U1 ||
+ (portsc & PORT_PLS_MASK) == XDEV_U2)) {
xhci_dbg(xhci, "resume SS port %d finished\n", port_id);
- /* We've just brought the device into U0 through either the
+ /* We've just brought the device into U0/1/2 through either the
* Resume state after a device remote wakeup, or through the
* U3Exit state after a host-initiated resume. If it's a device
* initiated remote wake, don't pass up the link state change,
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 652dc36e3012..9334cdee382a 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -452,6 +452,14 @@ struct xhci_op_regs {
*/
#define XHCI_DEFAULT_BESL 4
+/*
+ * USB3 specification define a 360ms tPollingLFPSTiemout for USB3 ports
+ * to complete link training. usually link trainig completes much faster
+ * so check status 10 times with 36ms sleep in places we need to wait for
+ * polling to complete.
+ */
+#define XHCI_PORT_POLLING_LFPS_TIME 36
+
/**
* struct xhci_intr_reg - Interrupt Register Set
* @irq_pending: IMAN - Interrupt Management Register. Used to enable
diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c
index 4d72b7d1d383..04684849d683 100644
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -547,7 +547,7 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
*/
hub->port_swap = USB251XB_DEF_PORT_SWAP;
of_property_for_each_u32(np, "swap-dx-lanes", prop, p, port) {
- if ((port >= 0) && (port <= data->port_cnt))
+ if (port <= data->port_cnt)
hub->port_swap |= BIT(port);
}
@@ -612,7 +612,7 @@ static int usb251xb_probe(struct usb251xb *hub)
dev);
int err;
- if (np) {
+ if (np && of_id) {
err = usb251xb_get_ofdata(hub,
(struct usb251xb_data *)of_id->data);
if (err) {
diff --git a/drivers/usb/mtu3/Kconfig b/drivers/usb/mtu3/Kconfig
index bcc23486c4ed..928c2cd6fc00 100644
--- a/drivers/usb/mtu3/Kconfig
+++ b/drivers/usb/mtu3/Kconfig
@@ -6,6 +6,7 @@ config USB_MTU3
tristate "MediaTek USB3 Dual Role controller"
depends on USB || USB_GADGET
depends on ARCH_MEDIATEK || COMPILE_TEST
+ depends on EXTCON || !EXTCON
select USB_XHCI_MTK if USB_SUPPORT && USB_XHCI_HCD
help
Say Y or M here if your system runs on MediaTek SoCs with
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index fffe23ab0189..979bef9bfb6b 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -80,6 +80,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x10C4, 0x804E) }, /* Software Bisque Paramount ME build-in converter */
{ USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */
{ USB_DEVICE(0x10C4, 0x8054) }, /* Enfora GSM2228 */
+ { USB_DEVICE(0x10C4, 0x8056) }, /* Lorenz Messtechnik devices */
{ USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */
{ USB_DEVICE(0x10C4, 0x806F) }, /* IMS USB to RS422 Converter Cable */
{ USB_DEVICE(0x10C4, 0x807A) }, /* Crumb128 board */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 8f5b17471759..1d8461ae2c34 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -609,6 +609,8 @@ static const struct usb_device_id id_table_combined[] = {
.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
{ USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID),
.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+ { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) },
+ { USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index b863bedb55a1..5755f0df0025 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -567,7 +567,9 @@
/*
* NovaTech product ids (FTDI_VID)
*/
-#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */
+#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */
+#define FTDI_NT_ORIONLX_PLUS_PID 0x7c91 /* OrionLX+ Substation Automation Platform */
+#define FTDI_NT_ORION_IO_PID 0x7c92 /* Orion I/O */
/*
* Synapse Wireless product ids (FTDI_VID)
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index fc52ac75fbf6..18110225d506 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -366,8 +366,6 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport,
if (!urbtrack)
return -ENOMEM;
- kref_get(&mos_parport->ref_count);
- urbtrack->mos_parport = mos_parport;
urbtrack->urb = usb_alloc_urb(0, GFP_ATOMIC);
if (!urbtrack->urb) {
kfree(urbtrack);
@@ -388,6 +386,8 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport,
usb_sndctrlpipe(usbdev, 0),
(unsigned char *)urbtrack->setup,
NULL, 0, async_complete, urbtrack);
+ kref_get(&mos_parport->ref_count);
+ urbtrack->mos_parport = mos_parport;
kref_init(&urbtrack->ref_count);
INIT_LIST_HEAD(&urbtrack->urblist_entry);
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 11b21d9410f3..83869065b802 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -246,6 +246,7 @@ static void option_instat_callback(struct urb *urb);
#define QUECTEL_PRODUCT_EC25 0x0125
#define QUECTEL_PRODUCT_BG96 0x0296
#define QUECTEL_PRODUCT_EP06 0x0306
+#define QUECTEL_PRODUCT_EM12 0x0512
#define CMOTECH_VENDOR_ID 0x16d8
#define CMOTECH_PRODUCT_6001 0x6001
@@ -1066,7 +1067,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(3) },
{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
- { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
+ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000), /* SIMCom SIM5218 */
+ .driver_info = NCTRL(0) | NCTRL(1) | NCTRL(2) | NCTRL(3) | RSVD(4) },
/* Quectel products using Qualcomm vendor ID */
{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
@@ -1087,6 +1089,9 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
.driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff),
+ .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
@@ -1940,10 +1945,12 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(4) },
{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */
.driver_info = RSVD(4) },
- { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
- { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
- { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */
- { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */
+ { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */
+ .driver_info = RSVD(4) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */
{ USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
{ USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },
{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) },
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 0f62db091d8d..a2233d72ae7c 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -37,6 +37,7 @@
S(SRC_ATTACHED), \
S(SRC_STARTUP), \
S(SRC_SEND_CAPABILITIES), \
+ S(SRC_SEND_CAPABILITIES_TIMEOUT), \
S(SRC_NEGOTIATE_CAPABILITIES), \
S(SRC_TRANSITION_SUPPLY), \
S(SRC_READY), \
@@ -2966,10 +2967,34 @@ static void run_state_machine(struct tcpm_port *port)
/* port->hard_reset_count = 0; */
port->caps_count = 0;
port->pd_capable = true;
- tcpm_set_state_cond(port, hard_reset_state(port),
+ tcpm_set_state_cond(port, SRC_SEND_CAPABILITIES_TIMEOUT,
PD_T_SEND_SOURCE_CAP);
}
break;
+ case SRC_SEND_CAPABILITIES_TIMEOUT:
+ /*
+ * Error recovery for a PD_DATA_SOURCE_CAP reply timeout.
+ *
+ * PD 2.0 sinks are supposed to accept src-capabilities with a
+ * 3.0 header and simply ignore any src PDOs which the sink does
+ * not understand such as PPS but some 2.0 sinks instead ignore
+ * the entire PD_DATA_SOURCE_CAP message, causing contract
+ * negotiation to fail.
+ *
+ * After PD_N_HARD_RESET_COUNT hard-reset attempts, we try
+ * sending src-capabilities with a lower PD revision to
+ * make these broken sinks work.
+ */
+ if (port->hard_reset_count < PD_N_HARD_RESET_COUNT) {
+ tcpm_set_state(port, HARD_RESET_SEND, 0);
+ } else if (port->negotiated_rev > PD_REV20) {
+ port->negotiated_rev--;
+ port->hard_reset_count = 0;
+ tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0);
+ } else {
+ tcpm_set_state(port, hard_reset_state(port), 0);
+ }
+ break;
case SRC_NEGOTIATE_CAPABILITIES:
ret = tcpm_pd_check_request(port);
if (ret < 0) {
diff --git a/drivers/usb/typec/tcpm/wcove.c b/drivers/usb/typec/tcpm/wcove.c
index 423208e19383..6770afd40765 100644
--- a/drivers/usb/typec/tcpm/wcove.c
+++ b/drivers/usb/typec/tcpm/wcove.c
@@ -615,8 +615,13 @@ static int wcove_typec_probe(struct platform_device *pdev)
wcove->dev = &pdev->dev;
wcove->regmap = pmic->regmap;
- irq = regmap_irq_get_virq(pmic->irq_chip_data_chgr,
- platform_get_irq(pdev, 0));
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq);
+ return irq;
+ }
+
+ irq = regmap_irq_get_virq(pmic->irq_chip_data_chgr, irq);
if (irq < 0)
return irq;
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index a25659b5a5d1..3fa20e95a6bb 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1661,11 +1661,11 @@ static void __init vfio_pci_fill_ids(void)
rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
subvendor, subdevice, class, class_mask, 0);
if (rc)
- pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n",
+ pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n",
vendor, device, subvendor, subdevice,
class, class_mask, rc);
else
- pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n",
+ pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n",
vendor, device, subvendor, subdevice,
class, class_mask);
}
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 8dbb270998f4..6b64e45a5269 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1398,7 +1398,7 @@ unlock_exit:
mutex_unlock(&container->lock);
}
-const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
+static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
.name = "iommu-vfio-powerpc",
.owner = THIS_MODULE,
.open = tce_iommu_open,
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 73652e21efec..d0f731c9920a 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -58,12 +58,18 @@ module_param_named(disable_hugepages,
MODULE_PARM_DESC(disable_hugepages,
"Disable VFIO IOMMU support for IOMMU hugepages.");
+static unsigned int dma_entry_limit __read_mostly = U16_MAX;
+module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
+MODULE_PARM_DESC(dma_entry_limit,
+ "Maximum number of user DMA mappings per container (65535).");
+
struct vfio_iommu {
struct list_head domain_list;
struct vfio_domain *external_domain; /* domain for external user */
struct mutex lock;
struct rb_root dma_list;
struct blocking_notifier_head notifier;
+ unsigned int dma_avail;
bool v2;
bool nesting;
};
@@ -836,6 +842,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
vfio_unlink_dma(iommu, dma);
put_task_struct(dma->task);
kfree(dma);
+ iommu->dma_avail++;
}
static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
@@ -1081,12 +1088,18 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
goto out_unlock;
}
+ if (!iommu->dma_avail) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
dma = kzalloc(sizeof(*dma), GFP_KERNEL);
if (!dma) {
ret = -ENOMEM;
goto out_unlock;
}
+ iommu->dma_avail--;
dma->iova = iova;
dma->vaddr = vaddr;
dma->prot = prot;
@@ -1583,6 +1596,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
INIT_LIST_HEAD(&iommu->domain_list);
iommu->dma_list = RB_ROOT;
+ iommu->dma_avail = dma_entry_limit;
mutex_init(&iommu->lock);
BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c
index df7d09409efe..8ca333f21292 100644
--- a/drivers/virt/vboxguest/vboxguest_core.c
+++ b/drivers/virt/vboxguest/vboxguest_core.c
@@ -27,6 +27,10 @@
#define GUEST_MAPPINGS_TRIES 5
+#define VBG_KERNEL_REQUEST \
+ (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV | \
+ VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN)
+
/**
* Reserves memory in which the VMM can relocate any guest mappings
* that are floating around.
@@ -48,7 +52,8 @@ static void vbg_guest_mappings_init(struct vbg_dev *gdev)
int i, rc;
/* Query the required space. */
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HYPERVISOR_INFO);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HYPERVISOR_INFO,
+ VBG_KERNEL_REQUEST);
if (!req)
return;
@@ -135,7 +140,8 @@ static void vbg_guest_mappings_exit(struct vbg_dev *gdev)
* Tell the host that we're going to free the memory we reserved for
* it, the free it up. (Leak the memory if anything goes wrong here.)
*/
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_HYPERVISOR_INFO);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_HYPERVISOR_INFO,
+ VBG_KERNEL_REQUEST);
if (!req)
return;
@@ -172,8 +178,10 @@ static int vbg_report_guest_info(struct vbg_dev *gdev)
struct vmmdev_guest_info2 *req2 = NULL;
int rc, ret = -ENOMEM;
- req1 = vbg_req_alloc(sizeof(*req1), VMMDEVREQ_REPORT_GUEST_INFO);
- req2 = vbg_req_alloc(sizeof(*req2), VMMDEVREQ_REPORT_GUEST_INFO2);
+ req1 = vbg_req_alloc(sizeof(*req1), VMMDEVREQ_REPORT_GUEST_INFO,
+ VBG_KERNEL_REQUEST);
+ req2 = vbg_req_alloc(sizeof(*req2), VMMDEVREQ_REPORT_GUEST_INFO2,
+ VBG_KERNEL_REQUEST);
if (!req1 || !req2)
goto out_free;
@@ -187,8 +195,8 @@ static int vbg_report_guest_info(struct vbg_dev *gdev)
req2->additions_minor = VBG_VERSION_MINOR;
req2->additions_build = VBG_VERSION_BUILD;
req2->additions_revision = VBG_SVN_REV;
- /* (no features defined yet) */
- req2->additions_features = 0;
+ req2->additions_features =
+ VMMDEV_GUEST_INFO2_ADDITIONS_FEATURES_REQUESTOR_INFO;
strlcpy(req2->name, VBG_VERSION_STRING,
sizeof(req2->name));
@@ -230,7 +238,8 @@ static int vbg_report_driver_status(struct vbg_dev *gdev, bool active)
struct vmmdev_guest_status *req;
int rc;
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_REPORT_GUEST_STATUS);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_REPORT_GUEST_STATUS,
+ VBG_KERNEL_REQUEST);
if (!req)
return -ENOMEM;
@@ -423,7 +432,8 @@ static int vbg_heartbeat_host_config(struct vbg_dev *gdev, bool enabled)
struct vmmdev_heartbeat *req;
int rc;
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_HEARTBEAT_CONFIGURE);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_HEARTBEAT_CONFIGURE,
+ VBG_KERNEL_REQUEST);
if (!req)
return -ENOMEM;
@@ -457,7 +467,8 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev)
gdev->guest_heartbeat_req = vbg_req_alloc(
sizeof(*gdev->guest_heartbeat_req),
- VMMDEVREQ_GUEST_HEARTBEAT);
+ VMMDEVREQ_GUEST_HEARTBEAT,
+ VBG_KERNEL_REQUEST);
if (!gdev->guest_heartbeat_req)
return -ENOMEM;
@@ -528,7 +539,8 @@ static int vbg_reset_host_event_filter(struct vbg_dev *gdev,
struct vmmdev_mask *req;
int rc;
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK,
+ VBG_KERNEL_REQUEST);
if (!req)
return -ENOMEM;
@@ -567,8 +579,14 @@ static int vbg_set_session_event_filter(struct vbg_dev *gdev,
u32 changed, previous;
int rc, ret = 0;
- /* Allocate a request buffer before taking the spinlock */
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK);
+ /*
+ * Allocate a request buffer before taking the spinlock, when
+ * the session is being terminated the requestor is the kernel,
+ * as we're cleaning up.
+ */
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK,
+ session_termination ? VBG_KERNEL_REQUEST :
+ session->requestor);
if (!req) {
if (!session_termination)
return -ENOMEM;
@@ -627,7 +645,8 @@ static int vbg_reset_host_capabilities(struct vbg_dev *gdev)
struct vmmdev_mask *req;
int rc;
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES,
+ VBG_KERNEL_REQUEST);
if (!req)
return -ENOMEM;
@@ -662,8 +681,14 @@ static int vbg_set_session_capabilities(struct vbg_dev *gdev,
u32 changed, previous;
int rc, ret = 0;
- /* Allocate a request buffer before taking the spinlock */
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES);
+ /*
+ * Allocate a request buffer before taking the spinlock, when
+ * the session is being terminated the requestor is the kernel,
+ * as we're cleaning up.
+ */
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES,
+ session_termination ? VBG_KERNEL_REQUEST :
+ session->requestor);
if (!req) {
if (!session_termination)
return -ENOMEM;
@@ -722,7 +747,8 @@ static int vbg_query_host_version(struct vbg_dev *gdev)
struct vmmdev_host_version *req;
int rc, ret;
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HOST_VERSION);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HOST_VERSION,
+ VBG_KERNEL_REQUEST);
if (!req)
return -ENOMEM;
@@ -783,19 +809,24 @@ int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events)
gdev->mem_balloon.get_req =
vbg_req_alloc(sizeof(*gdev->mem_balloon.get_req),
- VMMDEVREQ_GET_MEMBALLOON_CHANGE_REQ);
+ VMMDEVREQ_GET_MEMBALLOON_CHANGE_REQ,
+ VBG_KERNEL_REQUEST);
gdev->mem_balloon.change_req =
vbg_req_alloc(sizeof(*gdev->mem_balloon.change_req),
- VMMDEVREQ_CHANGE_MEMBALLOON);
+ VMMDEVREQ_CHANGE_MEMBALLOON,
+ VBG_KERNEL_REQUEST);
gdev->cancel_req =
vbg_req_alloc(sizeof(*(gdev->cancel_req)),
- VMMDEVREQ_HGCM_CANCEL2);
+ VMMDEVREQ_HGCM_CANCEL2,
+ VBG_KERNEL_REQUEST);
gdev->ack_events_req =
vbg_req_alloc(sizeof(*gdev->ack_events_req),
- VMMDEVREQ_ACKNOWLEDGE_EVENTS);
+ VMMDEVREQ_ACKNOWLEDGE_EVENTS,
+ VBG_KERNEL_REQUEST);
gdev->mouse_status_req =
vbg_req_alloc(sizeof(*gdev->mouse_status_req),
- VMMDEVREQ_GET_MOUSE_STATUS);
+ VMMDEVREQ_GET_MOUSE_STATUS,
+ VBG_KERNEL_REQUEST);
if (!gdev->mem_balloon.get_req || !gdev->mem_balloon.change_req ||
!gdev->cancel_req || !gdev->ack_events_req ||
@@ -892,9 +923,9 @@ void vbg_core_exit(struct vbg_dev *gdev)
* vboxguest_linux.c calls this when userspace opens the char-device.
* Return: A pointer to the new session or an ERR_PTR on error.
* @gdev: The Guest extension device.
- * @user: Set if this is a session for the vboxuser device.
+ * @requestor: VMMDEV_REQUESTOR_* flags
*/
-struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user)
+struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, u32 requestor)
{
struct vbg_session *session;
@@ -903,7 +934,7 @@ struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user)
return ERR_PTR(-ENOMEM);
session->gdev = gdev;
- session->user_session = user;
+ session->requestor = requestor;
return session;
}
@@ -924,7 +955,9 @@ void vbg_core_close_session(struct vbg_session *session)
if (!session->hgcm_client_ids[i])
continue;
- vbg_hgcm_disconnect(gdev, session->hgcm_client_ids[i], &rc);
+ /* requestor is kernel here, as we're cleaning up. */
+ vbg_hgcm_disconnect(gdev, VBG_KERNEL_REQUEST,
+ session->hgcm_client_ids[i], &rc);
}
kfree(session);
@@ -1152,7 +1185,8 @@ static int vbg_req_allowed(struct vbg_dev *gdev, struct vbg_session *session,
return -EPERM;
}
- if (trusted_apps_only && session->user_session) {
+ if (trusted_apps_only &&
+ (session->requestor & VMMDEV_REQUESTOR_USER_DEVICE)) {
vbg_err("Denying userspace vmm call type %#08x through vboxuser device node\n",
req->request_type);
return -EPERM;
@@ -1209,8 +1243,8 @@ static int vbg_ioctl_hgcm_connect(struct vbg_dev *gdev,
if (i >= ARRAY_SIZE(session->hgcm_client_ids))
return -EMFILE;
- ret = vbg_hgcm_connect(gdev, &conn->u.in.loc, &client_id,
- &conn->hdr.rc);
+ ret = vbg_hgcm_connect(gdev, session->requestor, &conn->u.in.loc,
+ &client_id, &conn->hdr.rc);
mutex_lock(&gdev->session_mutex);
if (ret == 0 && conn->hdr.rc >= 0) {
@@ -1251,7 +1285,8 @@ static int vbg_ioctl_hgcm_disconnect(struct vbg_dev *gdev,
if (i >= ARRAY_SIZE(session->hgcm_client_ids))
return -EINVAL;
- ret = vbg_hgcm_disconnect(gdev, client_id, &disconn->hdr.rc);
+ ret = vbg_hgcm_disconnect(gdev, session->requestor, client_id,
+ &disconn->hdr.rc);
mutex_lock(&gdev->session_mutex);
if (ret == 0 && disconn->hdr.rc >= 0)
@@ -1313,12 +1348,12 @@ static int vbg_ioctl_hgcm_call(struct vbg_dev *gdev,
}
if (IS_ENABLED(CONFIG_COMPAT) && f32bit)
- ret = vbg_hgcm_call32(gdev, client_id,
+ ret = vbg_hgcm_call32(gdev, session->requestor, client_id,
call->function, call->timeout_ms,
VBG_IOCTL_HGCM_CALL_PARMS32(call),
call->parm_count, &call->hdr.rc);
else
- ret = vbg_hgcm_call(gdev, client_id,
+ ret = vbg_hgcm_call(gdev, session->requestor, client_id,
call->function, call->timeout_ms,
VBG_IOCTL_HGCM_CALL_PARMS(call),
call->parm_count, &call->hdr.rc);
@@ -1408,6 +1443,7 @@ static int vbg_ioctl_check_balloon(struct vbg_dev *gdev,
}
static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev,
+ struct vbg_session *session,
struct vbg_ioctl_write_coredump *dump)
{
struct vmmdev_write_core_dump *req;
@@ -1415,7 +1451,8 @@ static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev,
if (vbg_ioctl_chk(&dump->hdr, sizeof(dump->u.in), 0))
return -EINVAL;
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_WRITE_COREDUMP);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_WRITE_COREDUMP,
+ session->requestor);
if (!req)
return -ENOMEM;
@@ -1476,7 +1513,7 @@ int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data)
case VBG_IOCTL_CHECK_BALLOON:
return vbg_ioctl_check_balloon(gdev, data);
case VBG_IOCTL_WRITE_CORE_DUMP:
- return vbg_ioctl_write_core_dump(gdev, data);
+ return vbg_ioctl_write_core_dump(gdev, session, data);
}
/* Variable sized requests. */
@@ -1508,7 +1545,8 @@ int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features)
struct vmmdev_mouse_status *req;
int rc;
- req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_MOUSE_STATUS);
+ req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_MOUSE_STATUS,
+ VBG_KERNEL_REQUEST);
if (!req)
return -ENOMEM;
diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h
index 7ad9ec45bfa9..4188c12b839f 100644
--- a/drivers/virt/vboxguest/vboxguest_core.h
+++ b/drivers/virt/vboxguest/vboxguest_core.h
@@ -154,15 +154,15 @@ struct vbg_session {
* host. Protected by vbg_gdev.session_mutex.
*/
u32 guest_caps;
- /** Does this session belong to a root process or a user one? */
- bool user_session;
+ /** VMMDEV_REQUESTOR_* flags */
+ u32 requestor;
/** Set on CANCEL_ALL_WAITEVENTS, protected by vbg_devevent_spinlock. */
bool cancel_waiters;
};
int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events);
void vbg_core_exit(struct vbg_dev *gdev);
-struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user);
+struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, u32 requestor);
void vbg_core_close_session(struct vbg_session *session);
int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data);
int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features);
@@ -172,12 +172,13 @@ irqreturn_t vbg_core_isr(int irq, void *dev_id);
void vbg_linux_mouse_event(struct vbg_dev *gdev);
/* Private (non exported) functions form vboxguest_utils.c */
-void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type);
+void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type,
+ u32 requestor);
void vbg_req_free(void *req, size_t len);
int vbg_req_perform(struct vbg_dev *gdev, void *req);
int vbg_hgcm_call32(
- struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms,
- struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count,
- int *vbox_status);
+ struct vbg_dev *gdev, u32 requestor, u32 client_id, u32 function,
+ u32 timeout_ms, struct vmmdev_hgcm_function_parameter32 *parm32,
+ u32 parm_count, int *vbox_status);
#endif
diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c
index 6e2a9619192d..6e8c0f1c1056 100644
--- a/drivers/virt/vboxguest/vboxguest_linux.c
+++ b/drivers/virt/vboxguest/vboxguest_linux.c
@@ -5,6 +5,7 @@
* Copyright (C) 2006-2016 Oracle Corporation
*/
+#include <linux/cred.h>
#include <linux/input.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
@@ -28,6 +29,23 @@ static DEFINE_MUTEX(vbg_gdev_mutex);
/** Global vbg_gdev pointer used by vbg_get/put_gdev. */
static struct vbg_dev *vbg_gdev;
+static u32 vbg_misc_device_requestor(struct inode *inode)
+{
+ u32 requestor = VMMDEV_REQUESTOR_USERMODE |
+ VMMDEV_REQUESTOR_CON_DONT_KNOW |
+ VMMDEV_REQUESTOR_TRUST_NOT_GIVEN;
+
+ if (from_kuid(current_user_ns(), current->cred->uid) == 0)
+ requestor |= VMMDEV_REQUESTOR_USR_ROOT;
+ else
+ requestor |= VMMDEV_REQUESTOR_USR_USER;
+
+ if (in_egroup_p(inode->i_gid))
+ requestor |= VMMDEV_REQUESTOR_GRP_VBOX;
+
+ return requestor;
+}
+
static int vbg_misc_device_open(struct inode *inode, struct file *filp)
{
struct vbg_session *session;
@@ -36,7 +54,7 @@ static int vbg_misc_device_open(struct inode *inode, struct file *filp)
/* misc_open sets filp->private_data to our misc device */
gdev = container_of(filp->private_data, struct vbg_dev, misc_device);
- session = vbg_core_open_session(gdev, false);
+ session = vbg_core_open_session(gdev, vbg_misc_device_requestor(inode));
if (IS_ERR(session))
return PTR_ERR(session);
@@ -53,7 +71,8 @@ static int vbg_misc_device_user_open(struct inode *inode, struct file *filp)
gdev = container_of(filp->private_data, struct vbg_dev,
misc_device_user);
- session = vbg_core_open_session(gdev, false);
+ session = vbg_core_open_session(gdev, vbg_misc_device_requestor(inode) |
+ VMMDEV_REQUESTOR_USER_DEVICE);
if (IS_ERR(session))
return PTR_ERR(session);
@@ -115,7 +134,8 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req,
req == VBG_IOCTL_VMMDEV_REQUEST_BIG;
if (is_vmmdev_req)
- buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT);
+ buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT,
+ session->requestor);
else
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c
index bf4474214b4d..75fd140b02ff 100644
--- a/drivers/virt/vboxguest/vboxguest_utils.c
+++ b/drivers/virt/vboxguest/vboxguest_utils.c
@@ -62,7 +62,8 @@ VBG_LOG(vbg_err, pr_err);
VBG_LOG(vbg_debug, pr_debug);
#endif
-void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type)
+void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type,
+ u32 requestor)
{
struct vmmdev_request_header *req;
int order = get_order(PAGE_ALIGN(len));
@@ -78,7 +79,7 @@ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type)
req->request_type = req_type;
req->rc = VERR_GENERAL_FAILURE;
req->reserved1 = 0;
- req->reserved2 = 0;
+ req->requestor = requestor;
return req;
}
@@ -119,7 +120,7 @@ static bool hgcm_req_done(struct vbg_dev *gdev,
return done;
}
-int vbg_hgcm_connect(struct vbg_dev *gdev,
+int vbg_hgcm_connect(struct vbg_dev *gdev, u32 requestor,
struct vmmdev_hgcm_service_location *loc,
u32 *client_id, int *vbox_status)
{
@@ -127,7 +128,7 @@ int vbg_hgcm_connect(struct vbg_dev *gdev,
int rc;
hgcm_connect = vbg_req_alloc(sizeof(*hgcm_connect),
- VMMDEVREQ_HGCM_CONNECT);
+ VMMDEVREQ_HGCM_CONNECT, requestor);
if (!hgcm_connect)
return -ENOMEM;
@@ -153,13 +154,15 @@ int vbg_hgcm_connect(struct vbg_dev *gdev,
}
EXPORT_SYMBOL(vbg_hgcm_connect);
-int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status)
+int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 requestor,
+ u32 client_id, int *vbox_status)
{
struct vmmdev_hgcm_disconnect *hgcm_disconnect = NULL;
int rc;
hgcm_disconnect = vbg_req_alloc(sizeof(*hgcm_disconnect),
- VMMDEVREQ_HGCM_DISCONNECT);
+ VMMDEVREQ_HGCM_DISCONNECT,
+ requestor);
if (!hgcm_disconnect)
return -ENOMEM;
@@ -593,9 +596,10 @@ static int hgcm_call_copy_back_result(
return 0;
}
-int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
- u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms,
- u32 parm_count, int *vbox_status)
+int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id,
+ u32 function, u32 timeout_ms,
+ struct vmmdev_hgcm_function_parameter *parms, u32 parm_count,
+ int *vbox_status)
{
struct vmmdev_hgcm_call *call;
void **bounce_bufs = NULL;
@@ -615,7 +619,7 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
goto free_bounce_bufs;
}
- call = vbg_req_alloc(size, VMMDEVREQ_HGCM_CALL);
+ call = vbg_req_alloc(size, VMMDEVREQ_HGCM_CALL, requestor);
if (!call) {
ret = -ENOMEM;
goto free_bounce_bufs;
@@ -647,9 +651,9 @@ EXPORT_SYMBOL(vbg_hgcm_call);
#ifdef CONFIG_COMPAT
int vbg_hgcm_call32(
- struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms,
- struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count,
- int *vbox_status)
+ struct vbg_dev *gdev, u32 requestor, u32 client_id, u32 function,
+ u32 timeout_ms, struct vmmdev_hgcm_function_parameter32 *parm32,
+ u32 parm_count, int *vbox_status)
{
struct vmmdev_hgcm_function_parameter *parm64 = NULL;
u32 i, size;
@@ -689,7 +693,7 @@ int vbg_hgcm_call32(
goto out_free;
}
- ret = vbg_hgcm_call(gdev, client_id, function, timeout_ms,
+ ret = vbg_hgcm_call(gdev, requestor, client_id, function, timeout_ms,
parm64, parm_count, vbox_status);
if (ret < 0)
goto out_free;
diff --git a/drivers/virt/vboxguest/vboxguest_version.h b/drivers/virt/vboxguest/vboxguest_version.h
index 77f0c8f8a231..84834dad38d5 100644
--- a/drivers/virt/vboxguest/vboxguest_version.h
+++ b/drivers/virt/vboxguest/vboxguest_version.h
@@ -9,11 +9,10 @@
#ifndef __VBOX_VERSION_H__
#define __VBOX_VERSION_H__
-/* Last synced October 4th 2017 */
-#define VBG_VERSION_MAJOR 5
-#define VBG_VERSION_MINOR 2
+#define VBG_VERSION_MAJOR 6
+#define VBG_VERSION_MINOR 0
#define VBG_VERSION_BUILD 0
-#define VBG_SVN_REV 68940
-#define VBG_VERSION_STRING "5.2.0"
+#define VBG_SVN_REV 127566
+#define VBG_VERSION_STRING "6.0.0"
#endif
diff --git a/drivers/virt/vboxguest/vmmdev.h b/drivers/virt/vboxguest/vmmdev.h
index 5e2ae978935d..6337b8d75d96 100644
--- a/drivers/virt/vboxguest/vmmdev.h
+++ b/drivers/virt/vboxguest/vmmdev.h
@@ -98,8 +98,8 @@ struct vmmdev_request_header {
s32 rc;
/** Reserved field no.1. MBZ. */
u32 reserved1;
- /** Reserved field no.2. MBZ. */
- u32 reserved2;
+ /** IN: Requestor information (VMMDEV_REQUESTOR_*) */
+ u32 requestor;
};
VMMDEV_ASSERT_SIZE(vmmdev_request_header, 24);
@@ -247,6 +247,8 @@ struct vmmdev_guest_info {
};
VMMDEV_ASSERT_SIZE(vmmdev_guest_info, 24 + 8);
+#define VMMDEV_GUEST_INFO2_ADDITIONS_FEATURES_REQUESTOR_INFO BIT(0)
+
/** struct vmmdev_guestinfo2 - Guest information report, version 2. */
struct vmmdev_guest_info2 {
/** Header. */
@@ -259,7 +261,7 @@ struct vmmdev_guest_info2 {
u32 additions_build;
/** SVN revision. */
u32 additions_revision;
- /** Feature mask, currently unused. */
+ /** Feature mask. */
u32 additions_features;
/**
* The intentional meaning of this field was:
diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c
index de01a6d0059d..a1c61e351d3f 100644
--- a/drivers/xen/privcmd-buf.c
+++ b/drivers/xen/privcmd-buf.c
@@ -140,8 +140,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
- vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
- GFP_KERNEL);
+ vma_priv = kzalloc(struct_size(vma_priv, pages, count), GFP_KERNEL);
if (!vma_priv)
return -ENOMEM;
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index c3e201025ef0..0782ff3c2273 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -622,9 +622,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
if (xen_store_evtchn == 0)
return -ENOENT;
- nonseekable_open(inode, filp);
-
- filp->f_mode &= ~FMODE_ATOMIC_POS; /* cdev-style semantics */
+ stream_open(inode, filp);
u = kzalloc(sizeof(*u), GFP_KERNEL);
if (u == NULL)
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index ca08c83168f5..0b37867b5c20 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1515,8 +1515,8 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
xdr_encode_AFS_StoreStatus(&bp, attr);
- *bp++ = 0; /* position of start of write */
- *bp++ = 0;
+ *bp++ = htonl(attr->ia_size >> 32); /* position of start of write */
+ *bp++ = htonl((u32) attr->ia_size);
*bp++ = 0; /* size of write */
*bp++ = 0;
*bp++ = htonl(attr->ia_size >> 32); /* new file length */
@@ -1564,7 +1564,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
xdr_encode_AFS_StoreStatus(&bp, attr);
- *bp++ = 0; /* position of start of write */
+ *bp++ = htonl(attr->ia_size); /* position of start of write */
*bp++ = 0; /* size of write */
*bp++ = htonl(attr->ia_size); /* new file length */
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 5aa57929e8c2..6e97a42d24d1 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -1514,7 +1514,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
bp = xdr_encode_u32(bp, 0); /* RPC flags */
bp = xdr_encode_YFSFid(bp, &vnode->fid);
bp = xdr_encode_YFS_StoreStatus(bp, attr);
- bp = xdr_encode_u64(bp, 0); /* position of start of write */
+ bp = xdr_encode_u64(bp, attr->ia_size); /* position of start of write */
bp = xdr_encode_u64(bp, 0); /* size of write */
bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */
yfs_check_req(call, bp);
diff --git a/fs/aio.c b/fs/aio.c
index 38b741aef0bf..3490d1fa0e16 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -181,7 +181,7 @@ struct poll_iocb {
struct file *file;
struct wait_queue_head *head;
__poll_t events;
- bool woken;
+ bool done;
bool cancelled;
struct wait_queue_entry wait;
struct work_struct work;
@@ -204,8 +204,7 @@ struct aio_kiocb {
struct kioctx *ki_ctx;
kiocb_cancel_fn *ki_cancel;
- struct iocb __user *ki_user_iocb; /* user's aiocb */
- __u64 ki_user_data; /* user's data for completion */
+ struct io_event ki_res;
struct list_head ki_list; /* the aio core uses this
* for cancellation */
@@ -1022,6 +1021,9 @@ static bool get_reqs_available(struct kioctx *ctx)
/* aio_get_req
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
+ *
+ * The refcount is initialized to 2 - one for the async op completion,
+ * one for the synchronous code that does this.
*/
static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
@@ -1031,10 +1033,15 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
if (unlikely(!req))
return NULL;
+ if (unlikely(!get_reqs_available(ctx))) {
+ kmem_cache_free(kiocb_cachep, req);
+ return NULL;
+ }
+
percpu_ref_get(&ctx->reqs);
req->ki_ctx = ctx;
INIT_LIST_HEAD(&req->ki_list);
- refcount_set(&req->ki_refcnt, 0);
+ refcount_set(&req->ki_refcnt, 2);
req->ki_eventfd = NULL;
return req;
}
@@ -1067,30 +1074,20 @@ out:
return ret;
}
-static inline void iocb_put(struct aio_kiocb *iocb)
-{
- if (refcount_read(&iocb->ki_refcnt) == 0 ||
- refcount_dec_and_test(&iocb->ki_refcnt)) {
- if (iocb->ki_filp)
- fput(iocb->ki_filp);
- percpu_ref_put(&iocb->ki_ctx->reqs);
- kmem_cache_free(kiocb_cachep, iocb);
- }
-}
-
-static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
- long res, long res2)
+static inline void iocb_destroy(struct aio_kiocb *iocb)
{
- ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
- ev->data = iocb->ki_user_data;
- ev->res = res;
- ev->res2 = res2;
+ if (iocb->ki_eventfd)
+ eventfd_ctx_put(iocb->ki_eventfd);
+ if (iocb->ki_filp)
+ fput(iocb->ki_filp);
+ percpu_ref_put(&iocb->ki_ctx->reqs);
+ kmem_cache_free(kiocb_cachep, iocb);
}
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb)
{
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
@@ -1114,14 +1111,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- aio_fill_event(event, iocb, res, res2);
+ *event = iocb->ki_res;
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
- pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
- res, res2);
+ pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
+ (void __user *)(unsigned long)iocb->ki_res.obj,
+ iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
/* after flagging the request as done, we
* must never even look at it again
@@ -1148,10 +1145,8 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (iocb->ki_eventfd) {
+ if (iocb->ki_eventfd)
eventfd_signal(iocb->ki_eventfd, 1);
- eventfd_ctx_put(iocb->ki_eventfd);
- }
/*
* We have to order our ring_info tail store above and test
@@ -1163,7 +1158,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
- iocb_put(iocb);
+}
+
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+ if (refcount_dec_and_test(&iocb->ki_refcnt)) {
+ aio_complete(iocb);
+ iocb_destroy(iocb);
+ }
}
/* aio_read_events_ring
@@ -1437,7 +1439,9 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
file_end_write(kiocb->ki_filp);
}
- aio_complete(iocb, res, res2);
+ iocb->ki_res.res = res;
+ iocb->ki_res.res2 = res2;
+ iocb_put(iocb);
}
static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
@@ -1514,13 +1518,13 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
}
}
-static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
+static int aio_read(struct kiocb *req, const struct iocb *iocb,
bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
struct file *file;
- ssize_t ret;
+ int ret;
ret = aio_prep_rw(req, iocb);
if (ret)
@@ -1542,13 +1546,13 @@ static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
return ret;
}
-static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
+static int aio_write(struct kiocb *req, const struct iocb *iocb,
bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
struct file *file;
- ssize_t ret;
+ int ret;
ret = aio_prep_rw(req, iocb);
if (ret)
@@ -1585,11 +1589,10 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
static void aio_fsync_work(struct work_struct *work)
{
- struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
- int ret;
+ struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
- ret = vfs_fsync(req->file, req->datasync);
- aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+ iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
+ iocb_put(iocb);
}
static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
@@ -1608,11 +1611,6 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
return 0;
}
-static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
- aio_complete(iocb, mangle_poll(mask), 0);
-}
-
static void aio_poll_complete_work(struct work_struct *work)
{
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1638,9 +1636,11 @@ static void aio_poll_complete_work(struct work_struct *work)
return;
}
list_del_init(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+ req->done = true;
spin_unlock_irq(&ctx->ctx_lock);
- aio_poll_complete(iocb, mask);
+ iocb_put(iocb);
}
/* assumes we are called with irqs disabled */
@@ -1668,31 +1668,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
__poll_t mask = key_to_poll(key);
unsigned long flags;
- req->woken = true;
-
/* for instances that support it check for an event match first: */
- if (mask) {
- if (!(mask & req->events))
- return 0;
+ if (mask && !(mask & req->events))
+ return 0;
+
+ list_del_init(&req->wait.entry);
+ if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
/*
* Try to complete the iocb inline if we can. Use
* irqsave/irqrestore because not all filesystems (e.g. fuse)
* call this function with IRQs disabled and because IRQs
* have to be disabled before ctx_lock is obtained.
*/
- if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
- list_del(&iocb->ki_list);
- spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
-
- list_del_init(&req->wait.entry);
- aio_poll_complete(iocb, mask);
- return 1;
- }
+ list_del(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+ req->done = true;
+ spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
+ iocb_put(iocb);
+ } else {
+ schedule_work(&req->work);
}
-
- list_del_init(&req->wait.entry);
- schedule_work(&req->work);
return 1;
}
@@ -1719,11 +1715,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
add_wait_queue(head, &pt->iocb->poll.wait);
}
-static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
+static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
{
struct kioctx *ctx = aiocb->ki_ctx;
struct poll_iocb *req = &aiocb->poll;
struct aio_poll_table apt;
+ bool cancel = false;
__poll_t mask;
/* reject any unknown events outside the normal event mask. */
@@ -1737,7 +1734,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
req->head = NULL;
- req->woken = false;
+ req->done = false;
req->cancelled = false;
apt.pt._qproc = aio_poll_queue_proc;
@@ -1749,156 +1746,135 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
INIT_LIST_HEAD(&req->wait.entry);
init_waitqueue_func_entry(&req->wait, aio_poll_wake);
- /* one for removal from waitqueue, one for this function */
- refcount_set(&aiocb->ki_refcnt, 2);
-
mask = vfs_poll(req->file, &apt.pt) & req->events;
- if (unlikely(!req->head)) {
- /* we did not manage to set up a waitqueue, done */
- goto out;
- }
-
spin_lock_irq(&ctx->ctx_lock);
- spin_lock(&req->head->lock);
- if (req->woken) {
- /* wake_up context handles the rest */
- mask = 0;
+ if (likely(req->head)) {
+ spin_lock(&req->head->lock);
+ if (unlikely(list_empty(&req->wait.entry))) {
+ if (apt.error)
+ cancel = true;
+ apt.error = 0;
+ mask = 0;
+ }
+ if (mask || apt.error) {
+ list_del_init(&req->wait.entry);
+ } else if (cancel) {
+ WRITE_ONCE(req->cancelled, true);
+ } else if (!req->done) { /* actually waiting for an event */
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ aiocb->ki_cancel = aio_poll_cancel;
+ }
+ spin_unlock(&req->head->lock);
+ }
+ if (mask) { /* no async, we'd stolen it */
+ aiocb->ki_res.res = mangle_poll(mask);
apt.error = 0;
- } else if (mask || apt.error) {
- /* if we get an error or a mask we are done */
- WARN_ON_ONCE(list_empty(&req->wait.entry));
- list_del_init(&req->wait.entry);
- } else {
- /* actually waiting for an event */
- list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
- aiocb->ki_cancel = aio_poll_cancel;
}
- spin_unlock(&req->head->lock);
spin_unlock_irq(&ctx->ctx_lock);
-
-out:
- if (unlikely(apt.error))
- return apt.error;
-
if (mask)
- aio_poll_complete(aiocb, mask);
- iocb_put(aiocb);
- return 0;
+ iocb_put(aiocb);
+ return apt.error;
}
static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
- struct iocb __user *user_iocb, bool compat)
+ struct iocb __user *user_iocb, struct aio_kiocb *req,
+ bool compat)
{
- struct aio_kiocb *req;
- ssize_t ret;
-
- /* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved2)) {
- pr_debug("EINVAL: reserve field set\n");
- return -EINVAL;
- }
-
- /* prevent overflows */
- if (unlikely(
- (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
- (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
- ((ssize_t)iocb->aio_nbytes < 0)
- )) {
- pr_debug("EINVAL: overflow check\n");
- return -EINVAL;
- }
-
- if (!get_reqs_available(ctx))
- return -EAGAIN;
-
- ret = -EAGAIN;
- req = aio_get_req(ctx);
- if (unlikely(!req))
- goto out_put_reqs_available;
-
req->ki_filp = fget(iocb->aio_fildes);
- ret = -EBADF;
if (unlikely(!req->ki_filp))
- goto out_put_req;
+ return -EBADF;
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
+ struct eventfd_ctx *eventfd;
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
* instance of the file* now. The file descriptor must be
* an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function.
*/
- req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
- if (IS_ERR(req->ki_eventfd)) {
- ret = PTR_ERR(req->ki_eventfd);
- req->ki_eventfd = NULL;
- goto out_put_req;
- }
+ eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ req->ki_eventfd = eventfd;
}
- ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
- if (unlikely(ret)) {
+ if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) {
pr_debug("EFAULT: aio_key\n");
- goto out_put_req;
+ return -EFAULT;
}
- req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb->aio_data;
+ req->ki_res.obj = (u64)(unsigned long)user_iocb;
+ req->ki_res.data = iocb->aio_data;
+ req->ki_res.res = 0;
+ req->ki_res.res2 = 0;
switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
- ret = aio_read(&req->rw, iocb, false, compat);
- break;
+ return aio_read(&req->rw, iocb, false, compat);
case IOCB_CMD_PWRITE:
- ret = aio_write(&req->rw, iocb, false, compat);
- break;
+ return aio_write(&req->rw, iocb, false, compat);
case IOCB_CMD_PREADV:
- ret = aio_read(&req->rw, iocb, true, compat);
- break;
+ return aio_read(&req->rw, iocb, true, compat);
case IOCB_CMD_PWRITEV:
- ret = aio_write(&req->rw, iocb, true, compat);
- break;
+ return aio_write(&req->rw, iocb, true, compat);
case IOCB_CMD_FSYNC:
- ret = aio_fsync(&req->fsync, iocb, false);
- break;
+ return aio_fsync(&req->fsync, iocb, false);
case IOCB_CMD_FDSYNC:
- ret = aio_fsync(&req->fsync, iocb, true);
- break;
+ return aio_fsync(&req->fsync, iocb, true);
case IOCB_CMD_POLL:
- ret = aio_poll(req, iocb);
- break;
+ return aio_poll(req, iocb);
default:
pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
- ret = -EINVAL;
- break;
+ return -EINVAL;
}
-
- /*
- * If ret is 0, we'd either done aio_complete() ourselves or have
- * arranged for that to be done asynchronously. Anything non-zero
- * means that we need to destroy req ourselves.
- */
- if (ret)
- goto out_put_req;
- return 0;
-out_put_req:
- if (req->ki_eventfd)
- eventfd_ctx_put(req->ki_eventfd);
- iocb_put(req);
-out_put_reqs_available:
- put_reqs_available(ctx, 1);
- return ret;
}
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
bool compat)
{
+ struct aio_kiocb *req;
struct iocb iocb;
+ int err;
if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
return -EFAULT;
- return __io_submit_one(ctx, &iocb, user_iocb, compat);
+ /* enforce forwards compatibility on users */
+ if (unlikely(iocb.aio_reserved2)) {
+ pr_debug("EINVAL: reserve field set\n");
+ return -EINVAL;
+ }
+
+ /* prevent overflows */
+ if (unlikely(
+ (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
+ (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
+ ((ssize_t)iocb.aio_nbytes < 0)
+ )) {
+ pr_debug("EINVAL: overflow check\n");
+ return -EINVAL;
+ }
+
+ req = aio_get_req(ctx);
+ if (unlikely(!req))
+ return -EAGAIN;
+
+ err = __io_submit_one(ctx, &iocb, user_iocb, req, compat);
+
+ /* Done with the synchronous reference */
+ iocb_put(req);
+
+ /*
+ * If err is 0, we'd either done aio_complete() ourselves or have
+ * arranged for that to be done asynchronously. Anything non-zero
+ * means that we need to destroy req ourselves.
+ */
+ if (unlikely(err)) {
+ iocb_destroy(req);
+ put_reqs_available(ctx, 1);
+ }
+ return err;
}
/* sys_io_submit:
@@ -1997,24 +1973,6 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
}
#endif
-/* lookup_kiocb
- * Finds a given iocb for cancellation.
- */
-static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
-{
- struct aio_kiocb *kiocb;
-
- assert_spin_locked(&ctx->ctx_lock);
-
- /* TODO: use a hash or array, this sucks. */
- list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
- if (kiocb->ki_user_iocb == iocb)
- return kiocb;
- }
- return NULL;
-}
-
/* sys_io_cancel:
* Attempts to cancel an iocb previously passed to io_submit. If
* the operation is successfully cancelled, the resulting event is
@@ -2032,6 +1990,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct aio_kiocb *kiocb;
int ret = -EINVAL;
u32 key;
+ u64 obj = (u64)(unsigned long)iocb;
if (unlikely(get_user(key, &iocb->aio_key)))
return -EFAULT;
@@ -2043,10 +2002,13 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
- kiocb = lookup_kiocb(ctx, iocb);
- if (kiocb) {
- ret = kiocb->ki_cancel(&kiocb->rw);
- list_del_init(&kiocb->ki_list);
+ /* TODO: use a hash or array, this sucks. */
+ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+ if (kiocb->ki_res.obj == obj) {
+ ret = kiocb->ki_cancel(&kiocb->rw);
+ list_del_init(&kiocb->ki_list);
+ break;
+ }
}
spin_unlock_irq(&ctx->ctx_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1d49694e6ae3..c5880329ae37 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6174,7 +6174,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
*
* This is overestimating in most cases.
*/
- qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
+ qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
spin_lock(&block_rsv->lock);
block_rsv->size = reserve_size;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index eb680b715dd6..e659d9d61107 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1922,8 +1922,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
int i;
/* Level sanity check */
- if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL ||
- root_level < 0 || root_level >= BTRFS_MAX_LEVEL ||
+ if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
+ root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
root_level < cur_level) {
btrfs_err_rl(fs_info,
"%s: bad levels, cur_level=%d root_level=%d",
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1869ba8e5981..67a6f7d47402 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2430,8 +2430,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
bitmap_clear(rbio->dbitmap, pagenr, 1);
kunmap(p);
- for (stripe = 0; stripe < rbio->real_stripes; stripe++)
+ for (stripe = 0; stripe < nr_data; stripe++)
kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
+ kunmap(p_page);
}
__free_page(p_page);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index acdad6d658f5..e4e665f422fc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
}
}
-static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
/*
* We use writeback_inodes_sb here because if we used
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
@@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
* from already being in a transaction and our join_transaction doesn't
* have to re-take the fs freeze lock.
*/
- if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+ } else {
+ struct btrfs_pending_snapshot *pending;
+ struct list_head *head = &trans->transaction->pending_snapshots;
+
+ /*
+ * Flush dellaloc for any root that is going to be snapshotted.
+ * This is done to avoid a corrupted version of files, in the
+ * snapshots, that had both buffered and direct IO writes (even
+ * if they were done sequentially) due to an unordered update of
+ * the inode's size on disk.
+ */
+ list_for_each_entry(pending, head, list) {
+ int ret;
+
+ ret = btrfs_start_delalloc_snapshot(pending->root);
+ if (ret)
+ return ret;
+ }
+ }
return 0;
}
-static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
{
- if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
+ } else {
+ struct btrfs_pending_snapshot *pending;
+ struct list_head *head = &trans->transaction->pending_snapshots;
+
+ /*
+ * Wait for any dellaloc that we started previously for the roots
+ * that are going to be snapshotted. This is to avoid a corrupted
+ * version of files in the snapshots that had both buffered and
+ * direct IO writes (even if they were done sequentially).
+ */
+ list_for_each_entry(pending, head, list)
+ btrfs_wait_ordered_extents(pending->root,
+ U64_MAX, 0, U64_MAX);
+ }
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@@ -2023,7 +2060,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
extwriter_counter_dec(cur_trans, trans->type);
- ret = btrfs_start_delalloc_flush(fs_info);
+ ret = btrfs_start_delalloc_flush(trans);
if (ret)
goto cleanup_transaction;
@@ -2039,7 +2076,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto cleanup_transaction;
- btrfs_wait_delalloc_flush(fs_info);
+ btrfs_wait_delalloc_flush(trans);
btrfs_scrub_pause(fs_info);
/*
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f06454a55e00..561884f60d35 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3578,9 +3578,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- /* find the first key from this transaction again */
+ /*
+ * Find the first key from this transaction again. See the note for
+ * log_new_dir_dentries, if we're logging a directory recursively we
+ * won't be holding its i_mutex, which means we can modify the directory
+ * while we're logging it. If we remove an entry between our first
+ * search and this search we'll not find the key again and can just
+ * bail.
+ */
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (WARN_ON(ret != 0))
+ if (ret != 0)
goto done;
/*
@@ -4544,6 +4551,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
*size_ret = btrfs_inode_size(path->nodes[0], item);
+ /*
+ * If the in-memory inode's i_size is smaller then the inode
+ * size stored in the btree, return the inode's i_size, so
+ * that we get a correct inode size after replaying the log
+ * when before a power failure we had a shrinking truncate
+ * followed by addition of a new name (rename / new hard link).
+ * Otherwise return the inode size from the btree, to avoid
+ * data loss when replaying a log due to previously doing a
+ * write that expands the inode's size and logging a new name
+ * immediately after.
+ */
+ if (*size_ret > inode->vfs_inode.i_size)
+ *size_ret = inode->vfs_inode.i_size;
}
btrfs_release_path(path);
@@ -4705,15 +4725,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, extent) ==
- BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_ram_bytes(leaf, extent);
- ASSERT(len == i_size ||
- (len == fs_info->sectorsize &&
- btrfs_file_extent_compression(leaf, extent) !=
- BTRFS_COMPRESS_NONE) ||
- (len < i_size && i_size < fs_info->sectorsize));
+ BTRFS_FILE_EXTENT_INLINE)
return 0;
- }
len = btrfs_file_extent_num_bytes(leaf, extent);
/* Last extent goes beyond i_size, no need to log a hole. */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9024eee889b9..db934ceae9c1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6407,7 +6407,7 @@ static void btrfs_end_bio(struct bio *bio)
if (bio_op(bio) == REQ_OP_WRITE)
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
- else
+ else if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e3346628efe2..2d61ddda9bf5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -524,6 +524,7 @@ static void ceph_i_callback(struct rcu_head *head)
struct inode *inode = container_of(head, struct inode, i_rcu);
struct ceph_inode_info *ci = ceph_inode(inode);
+ kfree(ci->i_symlink);
kmem_cache_free(ceph_inode_cachep, ci);
}
@@ -566,7 +567,6 @@ void ceph_destroy_inode(struct inode *inode)
}
}
- kfree(ci->i_symlink);
while ((n = rb_first(&ci->i_fragtree)) != NULL) {
frag = rb_entry(n, struct ceph_inode_frag, node);
rb_erase(n, &ci->i_fragtree);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f9b71c12cc9f..a05bf1d6e1d0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -559,6 +559,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
tcon->ses->server->echo_interval / HZ);
if (tcon->snapshot_time)
seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
+ if (tcon->handle_timeout)
+ seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
/* convert actimeo and display it in seconds */
seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 38feae812b47..5b18d4585740 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -60,6 +60,12 @@
#define CIFS_MAX_ACTIMEO (1 << 30)
/*
+ * Max persistent and resilient handle timeout (milliseconds).
+ * Windows durable max was 960000 (16 minutes)
+ */
+#define SMB3_MAX_HANDLE_TIMEOUT 960000
+
+/*
* MAX_REQ is the maximum number of requests that WE will send
* on one socket concurrently.
*/
@@ -586,6 +592,7 @@ struct smb_vol {
struct nls_table *local_nls;
unsigned int echo_interval; /* echo interval in secs */
__u64 snapshot_time; /* needed for timewarp tokens */
+ __u32 handle_timeout; /* persistent and durable handle timeout in ms */
unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
};
@@ -1058,6 +1065,7 @@ struct cifs_tcon {
__u32 vol_serial_number;
__le64 vol_create_time;
__u64 snapshot_time; /* for timewarp tokens - timestamp of snapshot */
+ __u32 handle_timeout; /* persistent and durable handle timeout in ms */
__u32 ss_flags; /* sector size flags */
__u32 perf_sector_size; /* best sector size for perf */
__u32 max_chunks;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a8e9738db691..4c0e44489f21 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -103,7 +103,7 @@ enum {
Opt_cruid, Opt_gid, Opt_file_mode,
Opt_dirmode, Opt_port,
Opt_blocksize, Opt_rsize, Opt_wsize, Opt_actimeo,
- Opt_echo_interval, Opt_max_credits,
+ Opt_echo_interval, Opt_max_credits, Opt_handletimeout,
Opt_snapshot,
/* Mount options which take string value */
@@ -208,6 +208,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_rsize, "rsize=%s" },
{ Opt_wsize, "wsize=%s" },
{ Opt_actimeo, "actimeo=%s" },
+ { Opt_handletimeout, "handletimeout=%s" },
{ Opt_echo_interval, "echo_interval=%s" },
{ Opt_max_credits, "max_credits=%s" },
{ Opt_snapshot, "snapshot=%s" },
@@ -1619,6 +1620,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->actimeo = CIFS_DEF_ACTIMEO;
+ /* Most clients set timeout to 0, allows server to use its default */
+ vol->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
+
/* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
vol->ops = &smb30_operations;
vol->vals = &smbdefault_values;
@@ -2017,6 +2021,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
break;
+ case Opt_handletimeout:
+ if (get_option_ul(args, &option)) {
+ cifs_dbg(VFS, "%s: Invalid handletimeout value\n",
+ __func__);
+ goto cifs_parse_mount_err;
+ }
+ vol->handle_timeout = option;
+ if (vol->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
+ cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
case Opt_echo_interval:
if (get_option_ul(args, &option)) {
cifs_dbg(VFS, "%s: Invalid echo interval value\n",
@@ -3183,6 +3199,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info)
return 0;
if (tcon->snapshot_time != volume_info->snapshot_time)
return 0;
+ if (tcon->handle_timeout != volume_info->handle_timeout)
+ return 0;
return 1;
}
@@ -3297,6 +3315,16 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
tcon->snapshot_time = volume_info->snapshot_time;
}
+ if (volume_info->handle_timeout) {
+ if (ses->server->vals->protocol_id == 0) {
+ cifs_dbg(VFS,
+ "Use SMB2.1 or later for handle timeout option\n");
+ rc = -EOPNOTSUPP;
+ goto out_fail;
+ } else
+ tcon->handle_timeout = volume_info->handle_timeout;
+ }
+
tcon->ses = ses;
if (volume_info->password) {
tcon->password = kstrdup(volume_info->password, GFP_KERNEL);
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index b204e84b87fb..54bffb2a1786 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -68,13 +68,15 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
if (oparms->tcon->use_resilient) {
- nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */
+ /* default timeout is 0, servers pick default (120 seconds) */
+ nr_ioctl_req.Timeout =
+ cpu_to_le32(oparms->tcon->handle_timeout);
nr_ioctl_req.Reserved = 0;
rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
true /* is_fsctl */,
(char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
- NULL, NULL /* no return info */);
+ CIFSMaxBufSize, NULL, NULL /* no return info */);
if (rc == -EOPNOTSUPP) {
cifs_dbg(VFS,
"resiliency not supported by server, disabling\n");
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 1022a3771e14..00225e699d03 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -581,7 +581,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
NULL /* no data input */, 0 /* no data input */,
- (char **)&out_buf, &ret_data_len);
+ CIFSMaxBufSize, (char **)&out_buf, &ret_data_len);
if (rc == -EOPNOTSUPP) {
cifs_dbg(FYI,
"server does not support query network interfaces\n");
@@ -717,32 +717,28 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
oparms.fid->mid = le64_to_cpu(o_rsp->sync_hdr.MessageId);
#endif /* CIFS_DEBUG2 */
- if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
- oplock = smb2_parse_lease_state(server, o_rsp,
- &oparms.fid->epoch,
- oparms.fid->lease_key);
- else
- goto oshr_exit;
-
-
memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid));
tcon->crfid.tcon = tcon;
tcon->crfid.is_valid = true;
kref_init(&tcon->crfid.refcount);
- kref_get(&tcon->crfid.refcount);
+ if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
+ kref_get(&tcon->crfid.refcount);
+ oplock = smb2_parse_lease_state(server, o_rsp,
+ &oparms.fid->epoch,
+ oparms.fid->lease_key);
+ } else
+ goto oshr_exit;
qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info))
goto oshr_exit;
- rc = smb2_validate_and_copy_iov(
+ if (!smb2_validate_and_copy_iov(
le16_to_cpu(qi_rsp->OutputBufferOffset),
sizeof(struct smb2_file_all_info),
&rsp_iov[1], sizeof(struct smb2_file_all_info),
- (char *)&tcon->crfid.file_all_info);
- if (rc)
- goto oshr_exit;
- tcon->crfid.file_all_info_is_valid = 1;
+ (char *)&tcon->crfid.file_all_info))
+ tcon->crfid.file_all_info_is_valid = 1;
oshr_exit:
mutex_unlock(&tcon->crfid.fid_mutex);
@@ -1299,7 +1295,7 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
- NULL, 0 /* no input */,
+ NULL, 0 /* no input */, CIFSMaxBufSize,
(char **)&res_key, &ret_data_len);
if (rc) {
@@ -1404,7 +1400,7 @@ smb2_ioctl_query_info(const unsigned int xid,
rc = SMB2_ioctl_init(tcon, &rqst[1],
COMPOUND_FID, COMPOUND_FID,
qi.info_type, true, NULL,
- 0);
+ 0, CIFSMaxBufSize);
}
} else if (qi.flags == PASSTHRU_QUERY_INFO) {
memset(&qi_iov, 0, sizeof(qi_iov));
@@ -1532,8 +1528,8 @@ smb2_copychunk_range(const unsigned int xid,
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
true /* is_fsctl */, (char *)pcchunk,
- sizeof(struct copychunk_ioctl), (char **)&retbuf,
- &ret_data_len);
+ sizeof(struct copychunk_ioctl), CIFSMaxBufSize,
+ (char **)&retbuf, &ret_data_len);
if (rc == 0) {
if (ret_data_len !=
sizeof(struct copychunk_ioctl_rsp)) {
@@ -1693,7 +1689,7 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
true /* is_fctl */,
- &setsparse, 1, NULL, NULL);
+ &setsparse, 1, CIFSMaxBufSize, NULL, NULL);
if (rc) {
tcon->broken_sparse_sup = true;
cifs_dbg(FYI, "set sparse rc = %d\n", rc);
@@ -1766,7 +1762,7 @@ smb2_duplicate_extents(const unsigned int xid,
true /* is_fsctl */,
(char *)&dup_ext_buf,
sizeof(struct duplicate_extents_to_file),
- NULL,
+ CIFSMaxBufSize, NULL,
&ret_data_len);
if (ret_data_len > 0)
@@ -1801,7 +1797,7 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
true /* is_fsctl */,
(char *)&integr_info,
sizeof(struct fsctl_set_integrity_information_req),
- NULL,
+ CIFSMaxBufSize, NULL,
&ret_data_len);
}
@@ -1809,6 +1805,8 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
/* GMT Token is @GMT-YYYY.MM.DD-HH.MM.SS Unicode which is 48 bytes + null */
#define GMT_TOKEN_SIZE 50
+#define MIN_SNAPSHOT_ARRAY_SIZE 16 /* See MS-SMB2 section 3.3.5.15.1 */
+
/*
* Input buffer contains (empty) struct smb_snapshot array with size filled in
* For output see struct SRV_SNAPSHOT_ARRAY in MS-SMB2 section 2.2.32.2
@@ -1820,13 +1818,29 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
char *retbuf = NULL;
unsigned int ret_data_len = 0;
int rc;
+ u32 max_response_size;
struct smb_snapshot_array snapshot_in;
+ if (get_user(ret_data_len, (unsigned int __user *)ioc_buf))
+ return -EFAULT;
+
+ /*
+ * Note that for snapshot queries that servers like Azure expect that
+ * the first query be minimal size (and just used to get the number/size
+ * of previous versions) so response size must be specified as EXACTLY
+ * sizeof(struct snapshot_array) which is 16 when rounded up to multiple
+ * of eight bytes.
+ */
+ if (ret_data_len == 0)
+ max_response_size = MIN_SNAPSHOT_ARRAY_SIZE;
+ else
+ max_response_size = CIFSMaxBufSize;
+
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
FSCTL_SRV_ENUMERATE_SNAPSHOTS,
true /* is_fsctl */,
- NULL, 0 /* no input data */,
+ NULL, 0 /* no input data */, max_response_size,
(char **)&retbuf,
&ret_data_len);
cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n",
@@ -2304,7 +2318,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_DFS_GET_REFERRALS,
true /* is_fsctl */,
- (char *)dfs_req, dfs_req_size,
+ (char *)dfs_req, dfs_req_size, CIFSMaxBufSize,
(char **)&dfs_rsp, &dfs_rsp_size);
} while (rc == -EAGAIN);
@@ -2658,7 +2672,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl_init(tcon, &rqst[num++], cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
true /* is_fctl */, (char *)&fsctl_buf,
- sizeof(struct file_zero_data_information));
+ sizeof(struct file_zero_data_information),
+ CIFSMaxBufSize);
if (rc)
goto zero_range_exit;
@@ -2735,7 +2750,8 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
true /* is_fctl */, (char *)&fsctl_buf,
- sizeof(struct file_zero_data_information), NULL, NULL);
+ sizeof(struct file_zero_data_information),
+ CIFSMaxBufSize, NULL, NULL);
free_xid(xid);
return rc;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 21ac19ff19cb..21ad01d55ab2 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1002,7 +1002,8 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
- (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen);
+ (char *)pneg_inbuf, inbuflen, CIFSMaxBufSize,
+ (char **)&pneg_rsp, &rsplen);
if (rc == -EOPNOTSUPP) {
/*
* Old Windows versions or Netapp SMB server can return
@@ -1858,8 +1859,9 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov,
}
static struct create_durable_v2 *
-create_durable_v2_buf(struct cifs_fid *pfid)
+create_durable_v2_buf(struct cifs_open_parms *oparms)
{
+ struct cifs_fid *pfid = oparms->fid;
struct create_durable_v2 *buf;
buf = kzalloc(sizeof(struct create_durable_v2), GFP_KERNEL);
@@ -1873,7 +1875,14 @@ create_durable_v2_buf(struct cifs_fid *pfid)
(struct create_durable_v2, Name));
buf->ccontext.NameLength = cpu_to_le16(4);
- buf->dcontext.Timeout = 0; /* Should this be configurable by workload */
+ /*
+ * NB: Handle timeout defaults to 0, which allows server to choose
+ * (most servers default to 120 seconds) and most clients default to 0.
+ * This can be overridden at mount ("handletimeout=") if the user wants
+ * a different persistent (or resilient) handle timeout for all opens
+ * opens on a particular SMB3 mount.
+ */
+ buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout);
buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
generate_random_uuid(buf->dcontext.CreateGuid);
memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
@@ -1926,7 +1935,7 @@ add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec,
struct smb2_create_req *req = iov[0].iov_base;
unsigned int num = *num_iovec;
- iov[num].iov_base = create_durable_v2_buf(oparms->fid);
+ iov[num].iov_base = create_durable_v2_buf(oparms);
if (iov[num].iov_base == NULL)
return -ENOMEM;
iov[num].iov_len = sizeof(struct create_durable_v2);
@@ -2478,7 +2487,8 @@ creat_exit:
int
SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
- bool is_fsctl, char *in_data, u32 indatalen)
+ bool is_fsctl, char *in_data, u32 indatalen,
+ __u32 max_response_size)
{
struct smb2_ioctl_req *req;
struct kvec *iov = rqst->rq_iov;
@@ -2520,16 +2530,21 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
req->OutputCount = 0; /* MBZ */
/*
- * Could increase MaxOutputResponse, but that would require more
- * than one credit. Windows typically sets this smaller, but for some
+ * In most cases max_response_size is set to 16K (CIFSMaxBufSize)
+ * We Could increase default MaxOutputResponse, but that could require
+ * more credits. Windows typically sets this smaller, but for some
* ioctls it may be useful to allow server to send more. No point
* limiting what the server can send as long as fits in one credit
- * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE
- * (by default, note that it can be overridden to make max larger)
- * in responses (except for read responses which can be bigger.
- * We may want to bump this limit up
+ * We can not handle more than CIFS_MAX_BUF_SIZE yet but may want
+ * to increase this limit up in the future.
+ * Note that for snapshot queries that servers like Azure expect that
+ * the first query be minimal size (and just used to get the number/size
+ * of previous versions) so response size must be specified as EXACTLY
+ * sizeof(struct snapshot_array) which is 16 when rounded up to multiple
+ * of eight bytes. Currently that is the only case where we set max
+ * response size smaller.
*/
- req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize);
+ req->MaxOutputResponse = cpu_to_le32(max_response_size);
if (is_fsctl)
req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
@@ -2550,13 +2565,14 @@ SMB2_ioctl_free(struct smb_rqst *rqst)
cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
}
+
/*
* SMB2 IOCTL is used for both IOCTLs and FSCTLs
*/
int
SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
u64 volatile_fid, u32 opcode, bool is_fsctl,
- char *in_data, u32 indatalen,
+ char *in_data, u32 indatalen, u32 max_out_data_len,
char **out_data, u32 *plen /* returned data len */)
{
struct smb_rqst rqst;
@@ -2593,8 +2609,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
rqst.rq_iov = iov;
rqst.rq_nvec = SMB2_IOCTL_IOV_SIZE;
- rc = SMB2_ioctl_init(tcon, &rqst, persistent_fid, volatile_fid,
- opcode, is_fsctl, in_data, indatalen);
+ rc = SMB2_ioctl_init(tcon, &rqst, persistent_fid, volatile_fid, opcode,
+ is_fsctl, in_data, indatalen, max_out_data_len);
if (rc)
goto ioctl_exit;
@@ -2672,7 +2688,8 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */,
(char *)&fsctl_input /* data input */,
- 2 /* in data len */, &ret_data /* out data */, NULL);
+ 2 /* in data len */, CIFSMaxBufSize /* max out data */,
+ &ret_data /* out data */, NULL);
cifs_dbg(FYI, "set compression rc %d\n", rc);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 3c32d0cfea69..52df125e9189 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -142,11 +142,12 @@ extern int SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
extern void SMB2_open_free(struct smb_rqst *rqst);
extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
- bool is_fsctl, char *in_data, u32 indatalen,
+ bool is_fsctl, char *in_data, u32 indatalen, u32 maxoutlen,
char **out_data, u32 *plen /* returned data len */);
extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
- bool is_fsctl, char *in_data, u32 indatalen);
+ bool is_fsctl, char *in_data, u32 indatalen,
+ __u32 max_response_size);
extern void SMB2_ioctl_free(struct smb_rqst *rqst);
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 95b5e78c22b1..f25daa207421 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -163,19 +163,24 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
-static void debugfs_evict_inode(struct inode *inode)
+static void debugfs_i_callback(struct rcu_head *head)
{
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
+ struct inode *inode = container_of(head, struct inode, i_rcu);
if (S_ISLNK(inode->i_mode))
kfree(inode->i_link);
+ free_inode_nonrcu(inode);
+}
+
+static void debugfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, debugfs_i_callback);
}
static const struct super_operations debugfs_super_operations = {
.statfs = simple_statfs,
.remount_fs = debugfs_remount,
.show_options = debugfs_show_options,
- .evict_inode = debugfs_evict_inode,
+ .destroy_inode = debugfs_destroy_inode,
};
static void debugfs_release_dentry(struct dentry *dentry)
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index 842e8f749db6..570d71043acf 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -410,7 +410,7 @@ bool fs_validate_description(const struct fs_parameter_description *desc)
for (param = desc->specs; param->name; param++) {
if (param->opt == e->opt &&
param->type != fs_param_is_enum) {
- pr_err("VALIDATE %s: e[%lu] enum val for %s\n",
+ pr_err("VALIDATE %s: e[%tu] enum val for %s\n",
name, e - desc->enums, param->name);
good = false;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ec32fece5e1e..9285dd4f4b1c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -755,11 +755,17 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
umode_t mode, dev_t dev)
{
struct inode *inode;
- struct resv_map *resv_map;
+ struct resv_map *resv_map = NULL;
- resv_map = resv_map_alloc();
- if (!resv_map)
- return NULL;
+ /*
+ * Reserve maps are only needed for inodes that can have associated
+ * page allocations.
+ */
+ if (S_ISREG(mode) || S_ISLNK(mode)) {
+ resv_map = resv_map_alloc();
+ if (!resv_map)
+ return NULL;
+ }
inode = new_inode(sb);
if (inode) {
@@ -794,8 +800,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
- } else
- kref_put(&resv_map->refs, resv_map_release);
+ } else {
+ if (resv_map)
+ kref_put(&resv_map->refs, resv_map_release);
+ }
return inode;
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6aaa30580a2b..07d6ef195d05 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1022,6 +1022,8 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
if (!ret) {
+ ssize_t ret2;
+
/*
* Open-code file_start_write here to grab freeze protection,
* which will be released by another thread in
@@ -1036,7 +1038,19 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
SB_FREEZE_WRITE);
}
kiocb->ki_flags |= IOCB_WRITE;
- io_rw_done(kiocb, call_write_iter(file, kiocb, &iter));
+
+ ret2 = call_write_iter(file, kiocb, &iter);
+ if (!force_nonblock || ret2 != -EAGAIN) {
+ io_rw_done(kiocb, ret2);
+ } else {
+ /*
+ * If ->needs_lock is true, we're already in async
+ * context.
+ */
+ if (!s->needs_lock)
+ io_async_list_note(WRITE, req, iov_count);
+ ret = -EAGAIN;
+ }
}
out_free:
kfree(iovec);
@@ -1968,7 +1982,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return 0;
if (sig) {
- ret = set_user_sigmask(sig, &ksigmask, &sigsaved, sigsz);
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+ &ksigmask, &sigsaved, sigsz);
+ else
+#endif
+ ret = set_user_sigmask(sig, &ksigmask,
+ &sigsaved, sigsz);
+
if (ret)
return ret;
}
@@ -2193,6 +2215,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
fput(ctx->user_files[i]);
kfree(ctx->user_files);
+ ctx->user_files = NULL;
ctx->nr_user_files = 0;
return ret;
}
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 389ea53ea487..bccfc40b3a74 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1414,11 +1414,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
- if (f->target) {
- kfree(f->target);
- f->target = NULL;
- }
-
fds = f->dents;
while(fds) {
fd = fds;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bb6ae387469f..05d892c79339 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -47,7 +47,10 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
static void jffs2_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
+ struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+
+ kfree(f->target);
+ kmem_cache_free(jffs2_inode_cachep, f);
}
static void jffs2_destroy_inode(struct inode *inode)
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 93fb7cf0b92b..f0b5c987d6ae 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -290,12 +290,11 @@ void nlmclnt_release_host(struct nlm_host *host)
WARN_ON_ONCE(host->h_server);
- if (refcount_dec_and_test(&host->h_count)) {
+ if (refcount_dec_and_mutex_lock(&host->h_count, &nlm_host_mutex)) {
WARN_ON_ONCE(!list_empty(&host->h_lockowners));
WARN_ON_ONCE(!list_empty(&host->h_granted));
WARN_ON_ONCE(!list_empty(&host->h_reclaim));
- mutex_lock(&nlm_host_mutex);
nlm_destroy_host_locked(host);
mutex_unlock(&nlm_host_mutex);
}
diff --git a/fs/locks.c b/fs/locks.c
index eaa1cfaf73b0..71d0c6c2aac5 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1160,6 +1160,11 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
*/
error = -EDEADLK;
spin_lock(&blocked_lock_lock);
+ /*
+ * Ensure that we don't find any locks blocked on this
+ * request during deadlock detection.
+ */
+ __locks_wake_up_blocks(request);
if (likely(!posix_locks_deadlock(request, fl))) {
error = FILE_LOCK_DEFERRED;
__locks_insert_block(fl, request,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index fb1cf1a4bda2..90d71fda65ce 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -453,7 +453,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
case XPRT_TRANSPORT_RDMA:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_TCP_RETRANS;
- if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0)
+ if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0)
to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;
if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
to->to_initval = NFS_MAX_TCP_TIMEOUT;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f9264e1922a2..6673d4ff5a2a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1289,6 +1289,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
static int ff_layout_read_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ int new_idx = hdr->pgio_mirror_idx;
int err;
trace_nfs4_pnfs_read(hdr, task->tk_status);
@@ -1307,7 +1308,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
case -NFS4ERR_RESET_TO_PNFS:
if (ff_layout_choose_best_ds_for_read(hdr->lseg,
hdr->pgio_mirror_idx + 1,
- &hdr->pgio_mirror_idx))
+ &new_idx))
goto out_layouterror;
set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
return task->tk_status;
@@ -1320,7 +1321,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
return 0;
out_layouterror:
+ ff_layout_read_record_layoutstats_done(task, hdr);
ff_layout_send_layouterror(hdr->lseg);
+ hdr->pgio_mirror_idx = new_idx;
out_eagain:
rpc_restart_call_prepare(task);
return -EAGAIN;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4dbb0ee23432..741ff8c9c6ed 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2933,7 +2933,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
}
out:
- nfs4_sequence_free_slot(&opendata->o_res.seq_res);
+ if (!opendata->cancelled)
+ nfs4_sequence_free_slot(&opendata->o_res.seq_res);
return ret;
}
@@ -6301,7 +6302,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
p->arg.seqid = seqid;
p->res.seqid = seqid;
p->lsp = lsp;
- refcount_inc(&lsp->ls_count);
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
p->l_ctx = nfs_get_lock_context(ctx);
@@ -6526,7 +6526,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->res.lock_seqid = p->arg.lock_seqid;
p->lsp = lsp;
p->server = server;
- refcount_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
locks_init_lock(&p->fl);
locks_copy_lock(&p->fl, fl);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a35259eebc56..1dc9a08e8bdc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4719,22 +4719,23 @@ out:
/* Lock an inode and grab a bh pointing to the inode. */
int ocfs2_reflink_inodes_lock(struct inode *s_inode,
- struct buffer_head **bh1,
+ struct buffer_head **bh_s,
struct inode *t_inode,
- struct buffer_head **bh2)
+ struct buffer_head **bh_t)
{
- struct inode *inode1;
- struct inode *inode2;
+ struct inode *inode1 = s_inode;
+ struct inode *inode2 = t_inode;
struct ocfs2_inode_info *oi1;
struct ocfs2_inode_info *oi2;
+ struct buffer_head *bh1 = NULL;
+ struct buffer_head *bh2 = NULL;
bool same_inode = (s_inode == t_inode);
+ bool need_swap = (inode1->i_ino > inode2->i_ino);
int status;
/* First grab the VFS and rw locks. */
lock_two_nondirectories(s_inode, t_inode);
- inode1 = s_inode;
- inode2 = t_inode;
- if (inode1->i_ino > inode2->i_ino)
+ if (need_swap)
swap(inode1, inode2);
status = ocfs2_rw_lock(inode1, 1);
@@ -4757,17 +4758,13 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode,
trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
(unsigned long long)oi2->ip_blkno);
- if (*bh1)
- *bh1 = NULL;
- if (*bh2)
- *bh2 = NULL;
-
/* We always want to lock the one with the lower lockid first. */
if (oi1->ip_blkno > oi2->ip_blkno)
mlog_errno(-ENOLCK);
/* lock id1 */
- status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET);
+ status = ocfs2_inode_lock_nested(inode1, &bh1, 1,
+ OI_LS_REFLINK_TARGET);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
@@ -4776,15 +4773,25 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode,
/* lock id2 */
if (!same_inode) {
- status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+ status = ocfs2_inode_lock_nested(inode2, &bh2, 1,
OI_LS_REFLINK_TARGET);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
goto out_cl1;
}
- } else
- *bh2 = *bh1;
+ } else {
+ bh2 = bh1;
+ }
+
+ /*
+ * If we swapped inode order above, we have to swap the buffer heads
+ * before passing them back to the caller.
+ */
+ if (need_swap)
+ swap(bh1, bh2);
+ *bh_s = bh1;
+ *bh_t = bh2;
trace_ocfs2_double_lock_end(
(unsigned long long)oi1->ip_blkno,
@@ -4794,8 +4801,7 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode,
out_cl1:
ocfs2_inode_unlock(inode1, 1);
- brelse(*bh1);
- *bh1 = NULL;
+ brelse(bh1);
out_rw2:
ocfs2_rw_unlock(inode2, 1);
out_i2:
diff --git a/fs/open.c b/fs/open.c
index 0285ce7dbd51..a00350018a47 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -733,6 +733,12 @@ static int do_dentry_open(struct file *f,
return 0;
}
+ /* Any file opened for execve()/uselib() has to be a regular file. */
+ if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) {
+ error = -EACCES;
+ goto cleanup_file;
+ }
+
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = get_write_access(inode);
if (unlikely(error))
@@ -1209,3 +1215,21 @@ int nonseekable_open(struct inode *inode, struct file *filp)
}
EXPORT_SYMBOL(nonseekable_open);
+
+/*
+ * stream_open is used by subsystems that want stream-like file descriptors.
+ * Such file descriptors are not seekable and don't have notion of position
+ * (file.f_pos is always 0). Contrary to file descriptors of other regular
+ * files, .read() and .write() can run simultaneously.
+ *
+ * stream_open never fails and is marked to return int so that it could be
+ * directly used as file_operations.open .
+ */
+int stream_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
+ filp->f_mode |= FMODE_STREAM;
+ return 0;
+}
+
+EXPORT_SYMBOL(stream_open);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ddef482f1334..6a803a0b75df 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -616,24 +616,25 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- long nr;
- unsigned long args[6], sp, pc;
+ struct syscall_info info;
+ u64 *args = &info.data.args[0];
int res;
res = lock_trace(task);
if (res)
return res;
- if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+ if (task_current_syscall(task, &info))
seq_puts(m, "running\n");
- else if (nr < 0)
- seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+ else if (info.data.nr < 0)
+ seq_printf(m, "%d 0x%llx 0x%llx\n",
+ info.data.nr, info.sp, info.data.instruction_pointer);
else
seq_printf(m,
- "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
- nr,
+ "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
+ info.data.nr,
args[0], args[1], args[2], args[3], args[4], args[5],
- sp, pc);
+ info.sp, info.data.instruction_pointer);
unlock_trace(task);
return 0;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d29d869abec1..f5834488b67d 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -615,7 +615,7 @@ static void __init proc_kcore_text_init(void)
/*
* MODULES_VADDR has no intersection with VMALLOC_ADDR.
*/
-struct kcore_list kcore_modules;
+static struct kcore_list kcore_modules;
static void __init add_modules_range(void)
{
if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 4d598a399bbf..d65390727541 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1626,7 +1626,8 @@ static void drop_sysctl_table(struct ctl_table_header *header)
if (--header->nreg)
return;
- put_links(header);
+ if (parent)
+ put_links(header);
start_unregistering(header);
if (!--header->count)
kfree_rcu(header, rcu);
diff --git a/fs/read_write.c b/fs/read_write.c
index 177ccc3d405a..61b43ad7608e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -560,12 +560,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
static inline loff_t file_pos_read(struct file *file)
{
- return file->f_pos;
+ return file->f_mode & FMODE_STREAM ? 0 : file->f_pos;
}
static inline void file_pos_write(struct file *file, loff_t pos)
{
- file->f_pos = pos;
+ if ((file->f_mode & FMODE_STREAM) == 0)
+ file->f_pos = pos;
}
ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 8dc2818fdd84..12628184772c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -276,14 +276,12 @@ static void ubifs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
struct ubifs_inode *ui = ubifs_inode(inode);
+ kfree(ui->data);
kmem_cache_free(ubifs_inode_slab, ui);
}
static void ubifs_destroy_inode(struct inode *inode)
{
- struct ubifs_inode *ui = ubifs_inode(inode);
-
- kfree(ui->data);
call_rcu(&inode->i_rcu, ubifs_i_callback);
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 48502cb9990f..4637ae1ae91c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1191,7 +1191,10 @@ xfs_iread_extents(
* Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
*/
level = be16_to_cpu(block->bb_level);
- ASSERT(level > 0);
+ if (unlikely(level == 0)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
@@ -4249,9 +4252,13 @@ xfs_bmapi_write(
struct xfs_bmbt_irec *mval, /* output: map values */
int *nmap) /* i/o: mval size/count */
{
+ struct xfs_bmalloca bma = {
+ .tp = tp,
+ .ip = ip,
+ .total = total,
+ };
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp;
- struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */
xfs_fileoff_t end; /* end of mapped file region */
bool eof = false; /* after the end of extents */
int error; /* error return */
@@ -4319,10 +4326,6 @@ xfs_bmapi_write(
eof = true;
if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
bma.prev.br_startoff = NULLFILEOFF;
- bma.tp = tp;
- bma.ip = ip;
- bma.total = total;
- bma.datatype = 0;
bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
n = 0;
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 6f94d1f7322d..117910db51b8 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -415,8 +415,17 @@ xchk_btree_check_owner(
struct xfs_btree_cur *cur = bs->cur;
struct check_owner *co;
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
+ /*
+ * In theory, xfs_btree_get_block should only give us a null buffer
+ * pointer for the root of a root-in-inode btree type, but we need
+ * to check defensively here in case the cursor state is also screwed
+ * up.
+ */
+ if (bp == NULL) {
+ if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
+ xchk_btree_set_corrupt(bs->sc, bs->cur, level);
return 0;
+ }
/*
* We want to cross-reference each btree block with the bnobt
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index f1260b4bfdee..90527b094878 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -574,6 +574,11 @@ xchk_da_btree(
/* Drill another level deeper. */
blkno = be32_to_cpu(key->before);
level++;
+ if (level >= XFS_DA_NODE_MAXDEPTH) {
+ /* Too deep! */
+ xchk_da_set_corrupt(&ds, level - 1);
+ break;
+ }
ds.tree_level--;
error = xchk_da_btree_block(&ds, level, blkno);
if (error)
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 93f07edafd81..9ee2a7d02e70 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -161,6 +161,14 @@ xfs_ioc_trim(
return -EPERM;
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
+
+ /*
+ * We haven't recovered the log, so we cannot use our bnobt-guided
+ * storage zapping commands.
+ */
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+ return -EROFS;
+
if (copy_from_user(&range, urange, sizeof(range)))
return -EFAULT;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1f2e2845eb76..a7ceae90110e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -529,18 +529,17 @@ xfs_file_dio_aio_write(
count = iov_iter_count(from);
/*
- * If we are doing unaligned IO, wait for all other IO to drain,
- * otherwise demote the lock if we had to take the exclusive lock
- * for other reasons in xfs_file_aio_write_checks.
+ * If we are doing unaligned IO, we can't allow any other overlapping IO
+ * in-flight at the same time or we risk data corruption. Wait for all
+ * other IO to drain before we submit. If the IO is aligned, demote the
+ * iolock if we had to take the exclusive lock in
+ * xfs_file_aio_write_checks() for other reasons.
*/
if (unaligned_io) {
- /* If we are going to wait for other DIO to finish, bail */
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (atomic_read(&inode->i_dio_count))
- return -EAGAIN;
- } else {
- inode_dio_wait(inode);
- }
+ /* unaligned dio always waits, bail */
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_dio_wait(inode);
} else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
@@ -548,6 +547,14 @@ xfs_file_dio_aio_write(
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
+
+ /*
+ * If unaligned, this is the only IO in-flight. If it has not yet
+ * completed, wait on it before we release the iolock to prevent
+ * subsequent overlapping IO.
+ */
+ if (ret == -EIOCBQUEUED && unaligned_io)
+ inode_dio_wait(inode);
out:
xfs_iunlock(ip, iolock);
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 30b1ae53689f..c50542dc71e0 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -150,7 +150,10 @@
/* Defaults for debug_level, debug and normal */
+#ifndef ACPI_DEBUG_DEFAULT
#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT | ACPI_LV_EVALUATION | ACPI_LV_REPAIR)
+#endif
+
#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT | ACPI_LV_REPAIR)
#define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL)
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 9ff328fd946a..624b90b34085 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -82,6 +82,11 @@
#define ACPI_NO_ERROR_MESSAGES
#undef ACPI_DEBUG_OUTPUT
+/* Use a specific bugging default separate from ACPICA */
+
+#undef ACPI_DEBUG_DEFAULT
+#define ACPI_DEBUG_DEFAULT (ACPI_LV_INFO | ACPI_LV_REPAIR)
+
/* External interface for __KERNEL__, stub is needed */
#define ACPI_EXTERNAL_RETURN_STATUS(prototype) \
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 0c938a4354f6..b88239e9efe4 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -105,41 +105,30 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
* syscall_get_arguments - extract system call parameter values
* @task: task of interest, must be blocked
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array filled with argument values
*
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5). Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call. First argument is stored in
+* @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n, unsigned long *args);
+ unsigned long *args);
/**
* syscall_set_arguments - change system call parameter value
* @task: task of interest, must be in system call entry tracing
* @regs: task_pt_regs() of @task
- * @i: argument index [0,5]
- * @n: number of arguments; n+i must be [1,6].
* @args: array of argument values to store
*
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call.
+ * The first argument gets value @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
* entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
*/
void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args);
/**
diff --git a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
index 8063e8314eef..6d487c5eba2c 100644
--- a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
+++ b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
@@ -51,7 +51,10 @@
#define RESET_SD_EMMC_A 44
#define RESET_SD_EMMC_B 45
#define RESET_SD_EMMC_C 46
-/* 47-60 */
+/* 47 */
+#define RESET_USB_PHY20 48
+#define RESET_USB_PHY21 49
+/* 50-60 */
#define RESET_AUDIO_CODEC 61
/* 62-63 */
/* RESET2 */
diff --git a/include/keys/trusted.h b/include/keys/trusted.h
index adbcb6817826..0071298b9b28 100644
--- a/include/keys/trusted.h
+++ b/include/keys/trusted.h
@@ -38,7 +38,7 @@ enum {
int TSS_authhmac(unsigned char *digest, const unsigned char *key,
unsigned int keylen, unsigned char *h1,
- unsigned char *h2, unsigned char h3, ...);
+ unsigned char *h2, unsigned int h3, ...);
int TSS_checkhmac1(unsigned char *buffer,
const uint32_t command,
const unsigned char *ononce,
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index d5cfc0b15b76..f6034ba774be 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -108,7 +108,7 @@ static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb)
#define AARP_RESOLVE_TIME (10 * HZ)
extern struct datalink_proto *ddp_dl, *aarp_dl;
-extern void aarp_proto_init(void);
+extern int aarp_proto_init(void);
/* Inter module exports */
diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h
index 50fb0dee23e8..d35b8ec1c485 100644
--- a/include/linux/bitrev.h
+++ b/include/linux/bitrev.h
@@ -34,41 +34,41 @@ static inline u32 __bitrev32(u32 x)
#define __constant_bitrev32(x) \
({ \
- u32 __x = x; \
- __x = (__x >> 16) | (__x << 16); \
- __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \
- __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \
- __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \
- __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \
- __x; \
+ u32 ___x = x; \
+ ___x = (___x >> 16) | (___x << 16); \
+ ___x = ((___x & (u32)0xFF00FF00UL) >> 8) | ((___x & (u32)0x00FF00FFUL) << 8); \
+ ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \
+ ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \
+ ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \
+ ___x; \
})
#define __constant_bitrev16(x) \
({ \
- u16 __x = x; \
- __x = (__x >> 8) | (__x << 8); \
- __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \
- __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \
- __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \
- __x; \
+ u16 ___x = x; \
+ ___x = (___x >> 8) | (___x << 8); \
+ ___x = ((___x & (u16)0xF0F0U) >> 4) | ((___x & (u16)0x0F0FU) << 4); \
+ ___x = ((___x & (u16)0xCCCCU) >> 2) | ((___x & (u16)0x3333U) << 2); \
+ ___x = ((___x & (u16)0xAAAAU) >> 1) | ((___x & (u16)0x5555U) << 1); \
+ ___x; \
})
#define __constant_bitrev8x4(x) \
({ \
- u32 __x = x; \
- __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \
- __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \
- __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \
- __x; \
+ u32 ___x = x; \
+ ___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4); \
+ ___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2); \
+ ___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1); \
+ ___x; \
})
#define __constant_bitrev8(x) \
({ \
- u8 __x = x; \
- __x = (__x >> 4) | (__x << 4); \
- __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \
- __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \
- __x; \
+ u8 ___x = x; \
+ ___x = (___x >> 4) | (___x << 4); \
+ ___x = ((___x & (u8)0xCCU) >> 2) | ((___x & (u8)0x33U) << 2); \
+ ___x = ((___x & (u8)0xAAU) >> 1) | ((___x & (u8)0x55U) << 1); \
+ ___x; \
})
#define bitrev32(x) \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a2132e09dc1c..f02367faa58d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -193,7 +193,6 @@ enum bpf_arg_type {
ARG_PTR_TO_CTX, /* pointer to context */
ARG_ANYTHING, /* any (initialized) argument is ok */
- ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
};
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 69f7a3449eda..7d8228d1c898 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -66,6 +66,46 @@ struct bpf_reg_state {
* same reference to the socket, to determine proper reference freeing.
*/
u32 id;
+ /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
+ * from a pointer-cast helper, bpf_sk_fullsock() and
+ * bpf_tcp_sock().
+ *
+ * Consider the following where "sk" is a reference counted
+ * pointer returned from "sk = bpf_sk_lookup_tcp();":
+ *
+ * 1: sk = bpf_sk_lookup_tcp();
+ * 2: if (!sk) { return 0; }
+ * 3: fullsock = bpf_sk_fullsock(sk);
+ * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
+ * 5: tp = bpf_tcp_sock(fullsock);
+ * 6: if (!tp) { bpf_sk_release(sk); return 0; }
+ * 7: bpf_sk_release(sk);
+ * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain
+ *
+ * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
+ * "tp" ptr should be invalidated also. In order to do that,
+ * the reg holding "fullsock" and "sk" need to remember
+ * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
+ * such that the verifier can reset all regs which have
+ * ref_obj_id matching the sk_reg->id.
+ *
+ * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
+ * sk_reg->id will stay as NULL-marking purpose only.
+ * After NULL-marking is done, sk_reg->id can be reset to 0.
+ *
+ * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
+ * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
+ *
+ * After "tp = bpf_tcp_sock(fullsock);" at line 5,
+ * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
+ * which is the same as sk_reg->ref_obj_id.
+ *
+ * From the verifier perspective, if sk, fullsock and tp
+ * are not NULL, they are the same ptr with different
+ * reg->type. In particular, bpf_sk_release(tp) is also
+ * allowed and has the same effect as bpf_sk_release(sk).
+ */
+ u32 ref_obj_id;
/* For scalar types (SCALAR_VALUE), this represents our knowledge of
* the actual value.
* For pointer types, this represents the variable part of the offset
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 9cd00a37b8d3..6db2d9a6e503 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -148,6 +148,22 @@
#define BCM_LED_SRC_OFF 0xe /* Tied high */
#define BCM_LED_SRC_ON 0xf /* Tied low */
+/*
+ * Broadcom Multicolor LED configurations (expansion register 4)
+ */
+#define BCM_EXP_MULTICOLOR (MII_BCM54XX_EXP_SEL_ER + 0x04)
+#define BCM_LED_MULTICOLOR_IN_PHASE BIT(8)
+#define BCM_LED_MULTICOLOR_LINK_ACT 0x0
+#define BCM_LED_MULTICOLOR_SPEED 0x1
+#define BCM_LED_MULTICOLOR_ACT_FLASH 0x2
+#define BCM_LED_MULTICOLOR_FDX 0x3
+#define BCM_LED_MULTICOLOR_OFF 0x4
+#define BCM_LED_MULTICOLOR_ON 0x5
+#define BCM_LED_MULTICOLOR_ALT 0x6
+#define BCM_LED_MULTICOLOR_FLASH 0x7
+#define BCM_LED_MULTICOLOR_LINK 0x8
+#define BCM_LED_MULTICOLOR_ACT 0x9
+#define BCM_LED_MULTICOLOR_PROGRAM 0xa
/*
* BCM5482: Shadow registers
diff --git a/include/linux/device.h b/include/linux/device.h
index b425a7ee04ce..4e6987e11f68 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -49,8 +49,6 @@ struct bus_attribute {
ssize_t (*store)(struct bus_type *bus, const char *buf, size_t count);
};
-#define BUS_ATTR(_name, _mode, _show, _store) \
- struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store)
#define BUS_ATTR_RW(_name) \
struct bus_attribute bus_attr_##_name = __ATTR_RW(_name)
#define BUS_ATTR_RO(_name) \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b42df09b04c..dd28e7679089 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -158,6 +158,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_OPENED ((__force fmode_t)0x80000)
#define FMODE_CREATED ((__force fmode_t)0x100000)
+/* File is stream-like */
+#define FMODE_STREAM ((__force fmode_t)0x200000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
@@ -3074,6 +3077,7 @@ extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
+extern int stream_open(struct inode * inode, struct file * filp);
#ifdef CONFIG_BLOCK
typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ea35263eb76b..11943b60f208 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -203,7 +203,6 @@ static inline void hugetlb_show_meminfo(void)
#define pud_huge(x) 0
#define is_hugepage_only_range(mm, addr, len) 0
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
-#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
src_addr, pagep) ({ BUG(); 0; })
#define huge_pte_offset(mm, address, sz) 0
@@ -234,6 +233,13 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
{
BUG();
}
+static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
+ struct vm_area_struct *vma, unsigned long address,
+ unsigned int flags)
+{
+ BUG();
+ return 0;
+}
#endif /* !CONFIG_HUGETLB_PAGE */
/*
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index c843f4a9c512..da676cdbd727 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -38,12 +38,6 @@ struct vmcoredd_node {
#ifdef CONFIG_PROC_KCORE
void __init kclist_add(struct kcore_list *, void *, size_t, int type);
-static inline
-void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
-{
- m->vaddr = (unsigned long)vaddr;
- kclist_add(m, addr, sz, KCORE_REMAP);
-}
extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn));
#else
@@ -51,11 +45,6 @@ static inline
void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
{
}
-
-static inline
-void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
-{
-}
#endif
#endif /* _LINUX_KCORE_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9d55c63db09b..79fa4426509c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -28,6 +28,7 @@
#include <linux/irqbypass.h>
#include <linux/swait.h>
#include <linux/refcount.h>
+#include <linux/nospec.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@@ -226,6 +227,32 @@ enum {
READING_SHADOW_PAGE_TABLES,
};
+#define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA)
+
+struct kvm_host_map {
+ /*
+ * Only valid if the 'pfn' is managed by the host kernel (i.e. There is
+ * a 'struct page' for it. When using mem= kernel parameter some memory
+ * can be used as guest memory but they are not managed by host
+ * kernel).
+ * If 'pfn' is not managed by the host kernel, this field is
+ * initialized to KVM_UNMAPPED_PAGE.
+ */
+ struct page *page;
+ void *hva;
+ kvm_pfn_t pfn;
+ kvm_pfn_t gfn;
+};
+
+/*
+ * Used to check if the mapping is valid or not. Never use 'kvm_host_map'
+ * directly to check for that.
+ */
+static inline bool kvm_vcpu_mapped(struct kvm_host_map *map)
+{
+ return !!map->hva;
+}
+
/*
* Sometimes a large or cross-page mmio needs to be broken up into separate
* exits for userspace servicing.
@@ -513,10 +540,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
- /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
- * the caller has read kvm->online_vcpus before (as is the case
- * for kvm_for_each_vcpu, for example).
- */
+ int num_vcpus = atomic_read(&kvm->online_vcpus);
+ i = array_index_nospec(i, num_vcpus);
+
+ /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
smp_rmb();
return kvm->vcpus[i];
}
@@ -600,6 +627,7 @@ void kvm_put_kvm(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
+ as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
@@ -731,7 +759,9 @@ struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
@@ -1240,11 +1270,21 @@ struct kvm_device_ops {
*/
void (*destroy)(struct kvm_device *dev);
+ /*
+ * Release is an alternative method to free the device. It is
+ * called when the device file descriptor is closed. Once
+ * release is called, the destroy method will not be called
+ * anymore as the device is removed from the device list of
+ * the VM. kvm->lock is held.
+ */
+ void (*release)(struct kvm_device *dev);
+
int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
unsigned long arg);
+ int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
};
void kvm_device_get(struct kvm_device *dev);
@@ -1305,6 +1345,16 @@ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */
+#ifdef CONFIG_HAVE_KVM_NO_POLL
+/* Callback that tells if we must not poll */
+bool kvm_arch_no_poll(struct kvm_vcpu *vcpu);
+#else
+static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+#endif /* CONFIG_HAVE_KVM_NO_POLL */
+
#ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL
long kvm_arch_vcpu_async_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
diff --git a/include/linux/list.h b/include/linux/list.h
index 79626b5ab36c..58aa3adf94e6 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -207,7 +207,7 @@ static inline void list_bulk_move_tail(struct list_head *head,
}
/**
- * list_is_first -- tests whether @ list is the first entry in list @head
+ * list_is_first -- tests whether @list is the first entry in list @head
* @list: the entry to test
* @head: the head of the list
*/
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1f3d880b7ca1..dbb6118370c1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -566,7 +566,10 @@ struct mem_cgroup *lock_page_memcg(struct page *page);
void __unlock_page_memcg(struct mem_cgroup *memcg);
void unlock_page_memcg(struct page *page);
-/* idx can be of type enum memcg_stat_item or node_stat_item */
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page_state().
+ */
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
int idx)
{
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 6fee8b1a4400..5cd824c1c0ca 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -469,7 +469,7 @@ static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising)
if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
advertising))
lcl_adv |= ADVERTISE_PAUSE_CAP;
- if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
advertising))
lcl_adv |= ADVERTISE_PAUSE_ASYM;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 022541dc5dbf..0d0729648844 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -594,6 +594,8 @@ enum mlx5_pagefault_type_flags {
};
struct mlx5_td {
+ /* protects tirs list changes while tirs refresh */
+ struct mutex list_lock;
struct list_head tirs_list;
u32 tdn;
};
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7eade9132f02..4ef4bbe78a1d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -671,7 +671,7 @@ enum vm_fault_reason {
/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16))
-#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf)
+#define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf)
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \
VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \
diff --git a/include/linux/net.h b/include/linux/net.h
index 651fca72286c..c606c72311d0 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -83,6 +83,12 @@ enum sock_type {
#endif /* ARCH_HAS_SOCKET_TYPES */
+/**
+ * enum sock_shutdown_cmd - Shutdown types
+ * @SHUT_RD: shutdown receptions
+ * @SHUT_WR: shutdown transmissions
+ * @SHUT_RDWR: shutdown receptions/transmissions
+ */
enum sock_shutdown_cmd {
SHUT_RD,
SHUT_WR,
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 4eb26d278046..280ae96dc4c3 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -41,16 +41,6 @@ int move_freepages_block(struct zone *zone, struct page *page,
/*
* Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
- * If specified range includes migrate types other than MOVABLE or CMA,
- * this will fail with -EBUSY.
- *
- * For isolating all pages in the range finally, the caller have to
- * free all pages in the range. test_page_isolated() can be used for
- * test it.
- *
- * The following flags are allowed (they can be combined in a bit mask)
- * SKIP_HWPOISON - ignore hwpoison pages
- * REPORT_FAILURE - report details about the failure to isolate the range
*/
int
start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
diff --git a/include/linux/parport.h b/include/linux/parport.h
index f41f1d041e2c..397607a0c0eb 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -460,7 +460,6 @@ extern size_t parport_ieee1284_epp_read_addr (struct parport *,
void *, size_t, int);
/* IEEE1284.3 functions */
-#define daisy_dev_name "Device ID probe"
extern int parport_daisy_init (struct parport *port);
extern void parport_daisy_fini (struct parport *port);
extern struct pardevice *parport_open (int devnum, const char *name);
@@ -469,18 +468,6 @@ extern ssize_t parport_device_id (int devnum, char *buffer, size_t len);
extern void parport_daisy_deselect_all (struct parport *port);
extern int parport_daisy_select (struct parport *port, int daisy, int mode);
-#ifdef CONFIG_PARPORT_1284
-extern int daisy_drv_init(void);
-extern void daisy_drv_exit(void);
-#else
-static inline int daisy_drv_init(void)
-{
- return 0;
-}
-
-static inline void daisy_drv_exit(void) {}
-#endif
-
/* Lowlevel drivers _can_ call this support function to handle irqs. */
static inline void parport_generic_irq(struct parport *port)
{
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e47ef764f613..820c4ff31bc5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -30,6 +30,7 @@ struct perf_guest_info_callbacks {
int (*is_in_guest)(void);
int (*is_user_mode)(void);
unsigned long (*get_guest_ip)(void);
+ void (*handle_intel_pt_intr)(void);
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/include/linux/platform_data/gpio/gpio-amd-fch.h b/include/linux/platform_data/gpio/gpio-amd-fch.h
index a867637e172d..9e46678edb2a 100644
--- a/include/linux/platform_data/gpio/gpio-amd-fch.h
+++ b/include/linux/platform_data/gpio/gpio-amd-fch.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL+ */
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* AMD FCH gpio driver platform-data
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index edb9b040c94c..d5084ebd9f03 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -9,6 +9,13 @@
#include <linux/bug.h> /* For BUG_ON. */
#include <linux/pid_namespace.h> /* For task_active_pid_ns. */
#include <uapi/linux/ptrace.h>
+#include <linux/seccomp.h>
+
+/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */
+struct syscall_info {
+ __u64 sp;
+ struct seccomp_data data;
+};
extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
@@ -407,9 +414,7 @@ static inline void user_single_step_report(struct pt_regs *regs)
#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
#endif
-extern int task_current_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc);
+extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
#endif
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ae5655197698..e412c092c1e8 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -418,10 +418,20 @@ static inline void set_restore_sigmask(void)
set_thread_flag(TIF_RESTORE_SIGMASK);
WARN_ON(!test_thread_flag(TIF_SIGPENDING));
}
+
+static inline void clear_tsk_restore_sigmask(struct task_struct *tsk)
+{
+ clear_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK);
+}
+
static inline void clear_restore_sigmask(void)
{
clear_thread_flag(TIF_RESTORE_SIGMASK);
}
+static inline bool test_tsk_restore_sigmask(struct task_struct *tsk)
+{
+ return test_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK);
+}
static inline bool test_restore_sigmask(void)
{
return test_thread_flag(TIF_RESTORE_SIGMASK);
@@ -439,6 +449,10 @@ static inline void set_restore_sigmask(void)
current->restore_sigmask = true;
WARN_ON(!test_thread_flag(TIF_SIGPENDING));
}
+static inline void clear_tsk_restore_sigmask(struct task_struct *tsk)
+{
+ tsk->restore_sigmask = false;
+}
static inline void clear_restore_sigmask(void)
{
current->restore_sigmask = false;
@@ -447,6 +461,10 @@ static inline bool test_restore_sigmask(void)
{
return current->restore_sigmask;
}
+static inline bool test_tsk_restore_sigmask(struct task_struct *tsk)
+{
+ return tsk->restore_sigmask;
+}
static inline bool test_and_clear_restore_sigmask(void)
{
if (!current->restore_sigmask)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 11b45f7ae405..9449b19c5f10 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -32,6 +32,8 @@
#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)
/* Use GFP_DMA memory */
#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U)
+/* Use GFP_DMA32 memory */
+#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U)
/* DEBUG: Store the last owner for bug hunting */
#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U)
/* Panic if kmem_cache_create() fails */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6016daeecee4..b57cd8bf96e2 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -26,7 +26,7 @@ typedef __kernel_sa_family_t sa_family_t;
/*
* 1003.1g requires sa_family_t and that sa_data is char.
*/
-
+
struct sockaddr {
sa_family_t sa_family; /* address family, AF_xxx */
char sa_data[14]; /* 14 bytes of protocol address */
@@ -44,7 +44,7 @@ struct linger {
* system, not 4.3. Thus msg_accrights(len) are now missing. They
* belong in an obscure libc emulation or the bin.
*/
-
+
struct msghdr {
void *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */
@@ -54,7 +54,7 @@ struct msghdr {
unsigned int msg_flags; /* flags on received message */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
};
-
+
struct user_msghdr {
void __user *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */
@@ -122,7 +122,7 @@ struct cmsghdr {
* inside range, given by msg->msg_controllen before using
* ancillary object DATA. --ANK (980731)
*/
-
+
static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
struct cmsghdr *__cmsg)
{
@@ -264,10 +264,10 @@ struct ucred {
/* Maximum queue length specifiable by listen. */
#define SOMAXCONN 128
-/* Flags we can use with send/ and recv.
+/* Flags we can use with send/ and recv.
Added those for 1003.1g not all are supported yet
*/
-
+
#define MSG_OOB 1
#define MSG_PEEK 2
#define MSG_DONTROUTE 4
diff --git a/include/linux/string.h b/include/linux/string.h
index 7927b875f80c..6ab0a6fa512e 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -150,6 +150,9 @@ extern void * memscan(void *,int,__kernel_size_t);
#ifndef __HAVE_ARCH_MEMCMP
extern int memcmp(const void *,const void *,__kernel_size_t);
#endif
+#ifndef __HAVE_ARCH_BCMP
+extern int bcmp(const void *,const void *,__kernel_size_t);
+#endif
#ifndef __HAVE_ARCH_MEMCHR
extern void * memchr(const void *,int,__kernel_size_t);
#endif
diff --git a/include/linux/vbox_utils.h b/include/linux/vbox_utils.h
index a240ed2a0372..ff56c443180c 100644
--- a/include/linux/vbox_utils.h
+++ b/include/linux/vbox_utils.h
@@ -24,15 +24,17 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...);
#define vbg_debug pr_debug
#endif
-int vbg_hgcm_connect(struct vbg_dev *gdev,
+int vbg_hgcm_connect(struct vbg_dev *gdev, u32 requestor,
struct vmmdev_hgcm_service_location *loc,
u32 *client_id, int *vbox_status);
-int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status);
+int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 requestor,
+ u32 client_id, int *vbox_status);
-int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
- u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms,
- u32 parm_count, int *vbox_status);
+int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id,
+ u32 function, u32 timeout_ms,
+ struct vmmdev_hgcm_function_parameter *parms, u32 parm_count,
+ int *vbox_status);
/**
* Convert a VirtualBox status code to a standard Linux kernel return value.
diff --git a/include/net/act_api.h b/include/net/act_api.h
index c745e9ccfab2..c61a1bf4e3de 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -39,7 +39,7 @@ struct tc_action {
struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
struct gnet_stats_queue __percpu *cpu_qstats;
struct tc_cookie __rcu *act_cookie;
- struct tcf_chain *goto_chain;
+ struct tcf_chain __rcu *goto_chain;
};
#define tcf_index common.tcfa_index
#define tcf_refcnt common.tcfa_refcnt
@@ -90,7 +90,7 @@ struct tc_action_ops {
int (*lookup)(struct net *net, struct tc_action **a, u32 index);
int (*init)(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack);
int (*walk)(struct net *, struct sk_buff *,
struct netlink_callback *, int,
@@ -181,6 +181,11 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
+ struct tcf_chain **handle,
+ struct netlink_ext_ack *newchain);
+struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
+ struct tcf_chain *newchain);
#endif /* CONFIG_NET_CLS_ACT */
static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
diff --git a/include/net/ip.h b/include/net/ip.h
index be3cad9c2e4c..583526aad1d0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -677,7 +677,7 @@ int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
unsigned char __user *data, int optlen);
void ip_options_undo(struct ip_options *opt);
void ip_forward_options(struct sk_buff *skb);
-int ip_options_rcv_srr(struct sk_buff *skb);
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
/*
* Functions provided by ip_sockglue.c
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index a68ced28d8f4..12689ddfc24c 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -59,6 +59,7 @@ struct net {
*/
spinlock_t rules_mod_lock;
+ u32 hash_mix;
atomic64_t cookie_gen;
struct list_head list; /* list of network namespaces */
diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
index 16a842456189..d9b665151f3d 100644
--- a/include/net/netns/hash.h
+++ b/include/net/netns/hash.h
@@ -2,16 +2,10 @@
#ifndef __NET_NS_HASH_H__
#define __NET_NS_HASH_H__
-#include <asm/cache.h>
-
-struct net;
+#include <net/net_namespace.h>
static inline u32 net_hash_mix(const struct net *net)
{
-#ifdef CONFIG_NET_NS
- return (u32)(((unsigned long)net) >> ilog2(sizeof(*net)));
-#else
- return 0;
-#endif
+ return net->hash_mix;
}
#endif
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 87499b6b35d6..df5c69db68af 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -166,7 +166,7 @@ struct nci_conn_info {
* According to specification 102 622 chapter 4.4 Pipes,
* the pipe identifier is 7 bits long.
*/
-#define NCI_HCI_MAX_PIPES 127
+#define NCI_HCI_MAX_PIPES 128
struct nci_hci_gate {
u8 gate;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 31284c078d06..a2b38b3deeca 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -378,6 +378,7 @@ struct tcf_chain {
bool flushing;
const struct tcf_proto_ops *tmplt_ops;
void *tmplt_priv;
+ struct rcu_head rcu;
};
struct tcf_block {
@@ -922,6 +923,41 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
sch->qstats.overlimits++;
}
+static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch)
+{
+ __u32 qlen = qdisc_qlen_sum(sch);
+
+ return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen);
+}
+
+static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen,
+ __u32 *backlog)
+{
+ struct gnet_stats_queue qstats = { 0 };
+ __u32 len = qdisc_qlen_sum(sch);
+
+ __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
+ *qlen = qstats.qlen;
+ *backlog = qstats.backlog;
+}
+
+static inline void qdisc_tree_flush_backlog(struct Qdisc *sch)
+{
+ __u32 qlen, backlog;
+
+ qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
+ qdisc_tree_reduce_backlog(sch, qlen, backlog);
+}
+
+static inline void qdisc_purge_queue(struct Qdisc *sch)
+{
+ __u32 qlen, backlog;
+
+ qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
+ qdisc_reset(sch);
+ qdisc_tree_reduce_backlog(sch, qlen, backlog);
+}
+
static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
{
qh->head = NULL;
@@ -1105,13 +1141,8 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
sch_tree_lock(sch);
old = *pold;
*pold = new;
- if (old != NULL) {
- unsigned int qlen = old->q.qlen;
- unsigned int backlog = old->qstats.backlog;
-
- qdisc_reset(old);
- qdisc_tree_reduce_backlog(old, qlen, backlog);
- }
+ if (old != NULL)
+ qdisc_tree_flush_backlog(old);
sch_tree_unlock(sch);
return old;
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index 32ee65a30aff..1c6e6c0766ca 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -61,7 +61,7 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2,
static inline __le32 sctp_compute_cksum(const struct sk_buff *skb,
unsigned int offset)
{
- struct sctphdr *sh = sctp_hdr(skb);
+ struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
const struct skb_checksum_ops ops = {
.update = sctp_csum_update,
.combine = sctp_csum_combine,
diff --git a/include/net/sock.h b/include/net/sock.h
index 328cb7cb7b0b..8de5ee258b93 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -710,6 +710,12 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
hlist_add_head_rcu(&sk->sk_node, list);
}
+static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list)
+{
+ sock_hold(sk);
+ hlist_add_tail_rcu(&sk->sk_node, list);
+}
+
static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index ee8d005f56fc..eb8f01c819e6 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -56,7 +56,7 @@ static inline bool is_tcf_gact_goto_chain(const struct tc_action *a)
static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a)
{
- return a->goto_chain->index;
+ return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK;
}
#endif /* __NET_TC_GACT_H */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 61cf7dbb6782..d074b6d60f8a 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -36,7 +36,6 @@ struct xdp_umem {
u32 headroom;
u32 chunk_size_nohr;
struct user_struct *user;
- struct pid *pid;
unsigned long address;
refcount_t users;
struct work_struct work;
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index 44a3259ed4a5..b6e0cbc2c71f 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -28,7 +28,7 @@ TRACE_EVENT_FN(sys_enter,
TP_fast_assign(
__entry->id = id;
- syscall_get_arguments(current, regs, 0, 6, __entry->args);
+ syscall_get_arguments(current, regs, __entry->args);
),
TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)",
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 5f24b50c9e88..059dc2bedaf6 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -7,5 +7,7 @@ no-export-headers += kvm.h
endif
ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),)
+ifeq ($(wildcard $(objtree)/arch/$(SRCARCH)/include/generated/uapi/asm/kvm_para.h),)
no-export-headers += kvm_para.h
endif
+endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3c38ac9a92a7..929c8e537a14 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -502,16 +502,6 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
- * Description
- * Push an element *value* in *map*. *flags* is one of:
- *
- * **BPF_EXIST**
- * If the queue/stack is full, the oldest element is removed to
- * make room for this.
- * Return
- * 0 on success, or a negative error in case of failure.
- *
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
@@ -1435,14 +1425,14 @@ union bpf_attr {
* u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
- * *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * *skb*, but gets socket from **struct bpf_sock_addr** context.
* Return
* A 8-byte long non-decreasing number.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
- * *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * *skb*, but gets socket from **struct bpf_sock_ops** context.
* Return
* A 8-byte long non-decreasing number.
*
@@ -2098,52 +2088,52 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * int bpf_rc_repeat(void *ctx)
* Description
* This helper is used in programs implementing IR decoding, to
- * report a successfully decoded key press with *scancode*,
- * *toggle* value in the given *protocol*. The scancode will be
- * translated to a keycode using the rc keymap, and reported as
- * an input key down event. After a period a key up event is
- * generated. This period can be extended by calling either
- * **bpf_rc_keydown**\ () again with the same values, or calling
- * **bpf_rc_repeat**\ ().
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
*
- * Some protocols include a toggle bit, in case the button was
- * released and pressed again between consecutive scancodes.
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
*
* The *ctx* should point to the lirc sample as passed into
* the program.
*
- * The *protocol* is the decoded protocol number (see
- * **enum rc_proto** for some predefined values).
- *
* This helper is only available is the kernel was compiled with
* the **CONFIG_BPF_LIRC_MODE2** configuration option set to
* "**y**".
* Return
* 0
*
- * int bpf_rc_repeat(void *ctx)
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
* Description
* This helper is used in programs implementing IR decoding, to
- * report a successfully decoded repeat key message. This delays
- * the generation of a key up event for previously generated
- * key down event.
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
*
- * Some IR protocols like NEC have a special IR message for
- * repeating last button, for when a button is held down.
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
*
* The *ctx* should point to the lirc sample as passed into
* the program.
*
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
* This helper is only available is the kernel was compiled with
* the **CONFIG_BPF_LIRC_MODE2** configuration option set to
* "**y**".
* Return
* 0
*
- * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
* Description
* Return the cgroup v2 id of the socket associated with the *skb*.
* This is roughly similar to the **bpf_get_cgroup_classid**\ ()
@@ -2159,30 +2149,12 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
- * Description
- * Return id of cgroup v2 that is ancestor of cgroup associated
- * with the *skb* at the *ancestor_level*. The root cgroup is at
- * *ancestor_level* zero and each step down the hierarchy
- * increments the level. If *ancestor_level* == level of cgroup
- * associated with *skb*, then return value will be same as that
- * of **bpf_skb_cgroup_id**\ ().
- *
- * The helper is useful to implement policies based on cgroups
- * that are upper in hierarchy than immediate cgroup associated
- * with *skb*.
- *
- * The format of returned id and helper limitations are same as in
- * **bpf_skb_cgroup_id**\ ().
- * Return
- * The id is returned or 0 in case the id could not be retrieved.
- *
* u64 bpf_get_current_cgroup_id(void)
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
*
- * void* get_local_storage(void *map, u64 flags)
+ * void *bpf_get_local_storage(void *map, u64 flags)
* Description
* Get the pointer to the local storage area.
* The type and the size of the local storage is defined
@@ -2209,6 +2181,24 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for TCP socket matching *tuple*, optionally in a child
@@ -2289,6 +2279,16 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is
+ * removed to make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
* int bpf_map_pop_elem(struct bpf_map *map, void *value)
* Description
* Pop an element from *map*.
@@ -2343,29 +2343,94 @@ union bpf_attr {
* Return
* 0
*
+ * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * Description
+ * Acquire a spinlock represented by the pointer *lock*, which is
+ * stored as part of a value of a map. Taking the lock allows to
+ * safely update the rest of the fields in that value. The
+ * spinlock can (and must) later be released with a call to
+ * **bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ * Spinlocks in BPF programs come with a number of restrictions
+ * and constraints:
+ *
+ * * **bpf_spin_lock** objects are only allowed inside maps of
+ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ * list could be extended in the future).
+ * * BTF description of the map is mandatory.
+ * * The BPF program can take ONE lock at a time, since taking two
+ * or more could cause dead locks.
+ * * Only one **struct bpf_spin_lock** is allowed per map element.
+ * * When the lock is taken, calls (either BPF to BPF or helpers)
+ * are not allowed.
+ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ * allowed inside a spinlock-ed region.
+ * * The BPF program MUST call **bpf_spin_unlock**\ () to release
+ * the lock, on all execution paths, before it returns.
+ * * The BPF program can access **struct bpf_spin_lock** only via
+ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ * helpers. Loading or storing data into the **struct
+ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ * * To use the **bpf_spin_lock**\ () helper, the BTF description
+ * of the map value must be a struct and have **struct
+ * bpf_spin_lock** *anyname*\ **;** field at the top level.
+ * Nested lock inside another struct is not allowed.
+ * * The **struct bpf_spin_lock** *lock* field in a map value must
+ * be aligned on a multiple of 4 bytes in that value.
+ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ * the **bpf_spin_lock** field to user space.
+ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ * a BPF program, do not update the **bpf_spin_lock** field.
+ * * **bpf_spin_lock** cannot be on the stack or inside a
+ * networking packet (it can only be inside of a map values).
+ * * **bpf_spin_lock** is available to root only.
+ * * Tracing programs and socket filter programs cannot use
+ * **bpf_spin_lock**\ () due to insufficient preemption checks
+ * (but this may change in the future).
+ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ * Return
+ * 0
+ *
+ * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * Description
+ * Release the *lock* previously locked by a call to
+ * **bpf_spin_lock**\ (\ *lock*\ ).
+ * Return
+ * 0
+ *
* struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
* Description
* This helper gets a **struct bpf_sock** pointer such
- * that all the fields in bpf_sock can be accessed.
+ * that all the fields in this **bpf_sock** can be accessed.
* Return
- * A **struct bpf_sock** pointer on success, or NULL in
+ * A **struct bpf_sock** pointer on success, or **NULL** in
* case of failure.
*
* struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
* Description
* This helper gets a **struct bpf_tcp_sock** pointer from a
* **struct bpf_sock** pointer.
- *
* Return
- * A **struct bpf_tcp_sock** pointer on success, or NULL in
+ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in
* case of failure.
*
* int bpf_skb_ecn_set_ce(struct sk_buf *skb)
- * Description
- * Sets ECN of IP header to ce (congestion encountered) if
- * current value is ect (ECN capable). Works with IPv6 and IPv4.
- * Return
- * 1 if set, 0 if not set.
+ * Description
+ * Set ECN (Explicit Congestion Notification) field of IP header
+ * to **CE** (Congestion Encountered) if current value is **ECT**
+ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ * and IPv4.
+ * Return
+ * 1 if the **CE** flag is set (either by the current helper call
+ * or because it was already present), 0 if it is not set.
+ *
+ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
+ * Description
+ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ * **bpf_sk_release**\ () is unnecessary and not allowed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2465,7 +2530,8 @@ union bpf_attr {
FN(spin_unlock), \
FN(sk_fullsock), \
FN(tcp_sock), \
- FN(skb_ecn_set_ce),
+ FN(skb_ecn_set_ce), \
+ FN(get_listener_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 3652b239dad1..d473e5ed044c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1591,7 +1591,7 @@ enum ethtool_link_mode_bit_indices {
static inline int ethtool_validate_speed(__u32 speed)
{
- return speed <= INT_MAX || speed == SPEED_UNKNOWN;
+ return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN;
}
/* Duplex, half or full. */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4dc34f8e29f6..2fe12b40d503 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -986,11 +986,13 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
#define KVM_CAP_EXCEPTION_PAYLOAD 164
#define KVM_CAP_ARM_VM_IPA_SIZE 165
-#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
+#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */
#define KVM_CAP_HYPERV_CPUID 167
-#define KVM_CAP_ARM_SVE 168
-#define KVM_CAP_ARM_PTRAUTH_ADDRESS 169
-#define KVM_CAP_ARM_PTRAUTH_GENERIC 170
+#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168
+#define KVM_CAP_PPC_IRQ_XIVE 169
+#define KVM_CAP_ARM_SVE 170
+#define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
+#define KVM_CAP_ARM_PTRAUTH_GENERIC 172
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1215,6 +1217,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3
KVM_DEV_TYPE_ARM_VGIC_ITS,
#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS
+ KVM_DEV_TYPE_XIVE,
+#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
KVM_DEV_TYPE_MAX,
};
@@ -1438,7 +1442,7 @@ struct kvm_enc_region {
#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
-/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */
+/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */
#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
/* Available with KVM_CAP_HYPERV_CPUID */
diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h
index 0e68024f36c7..26f39816af14 100644
--- a/include/uapi/linux/vbox_vmmdev_types.h
+++ b/include/uapi/linux/vbox_vmmdev_types.h
@@ -102,6 +102,66 @@ enum vmmdev_request_type {
#define VMMDEVREQ_HGCM_CALL VMMDEVREQ_HGCM_CALL32
#endif
+/* vmmdev_request_header.requestor defines */
+
+/* Requestor user not given. */
+#define VMMDEV_REQUESTOR_USR_NOT_GIVEN 0x00000000
+/* The kernel driver (vboxguest) is the requestor. */
+#define VMMDEV_REQUESTOR_USR_DRV 0x00000001
+/* Some other kernel driver is the requestor. */
+#define VMMDEV_REQUESTOR_USR_DRV_OTHER 0x00000002
+/* The root or a admin user is the requestor. */
+#define VMMDEV_REQUESTOR_USR_ROOT 0x00000003
+/* Regular joe user is making the request. */
+#define VMMDEV_REQUESTOR_USR_USER 0x00000006
+/* User classification mask. */
+#define VMMDEV_REQUESTOR_USR_MASK 0x00000007
+
+/* Kernel mode request. Note this is 0, check for !USERMODE instead. */
+#define VMMDEV_REQUESTOR_KERNEL 0x00000000
+/* User mode request. */
+#define VMMDEV_REQUESTOR_USERMODE 0x00000008
+/* User or kernel mode classification mask. */
+#define VMMDEV_REQUESTOR_MODE_MASK 0x00000008
+
+/* Don't know the physical console association of the requestor. */
+#define VMMDEV_REQUESTOR_CON_DONT_KNOW 0x00000000
+/*
+ * The request originates with a process that is NOT associated with the
+ * physical console.
+ */
+#define VMMDEV_REQUESTOR_CON_NO 0x00000010
+/* Requestor process is associated with the physical console. */
+#define VMMDEV_REQUESTOR_CON_YES 0x00000020
+/* Console classification mask. */
+#define VMMDEV_REQUESTOR_CON_MASK 0x00000030
+
+/* Requestor is member of special VirtualBox user group. */
+#define VMMDEV_REQUESTOR_GRP_VBOX 0x00000080
+
+/* Note: trust level is for windows guests only, linux always uses not-given */
+/* Requestor trust level: Unspecified */
+#define VMMDEV_REQUESTOR_TRUST_NOT_GIVEN 0x00000000
+/* Requestor trust level: Untrusted (SID S-1-16-0) */
+#define VMMDEV_REQUESTOR_TRUST_UNTRUSTED 0x00001000
+/* Requestor trust level: Untrusted (SID S-1-16-4096) */
+#define VMMDEV_REQUESTOR_TRUST_LOW 0x00002000
+/* Requestor trust level: Medium (SID S-1-16-8192) */
+#define VMMDEV_REQUESTOR_TRUST_MEDIUM 0x00003000
+/* Requestor trust level: Medium plus (SID S-1-16-8448) */
+#define VMMDEV_REQUESTOR_TRUST_MEDIUM_PLUS 0x00004000
+/* Requestor trust level: High (SID S-1-16-12288) */
+#define VMMDEV_REQUESTOR_TRUST_HIGH 0x00005000
+/* Requestor trust level: System (SID S-1-16-16384) */
+#define VMMDEV_REQUESTOR_TRUST_SYSTEM 0x00006000
+/* Requestor trust level >= Protected (SID S-1-16-20480, S-1-16-28672) */
+#define VMMDEV_REQUESTOR_TRUST_PROTECTED 0x00007000
+/* Requestor trust level mask */
+#define VMMDEV_REQUESTOR_TRUST_MASK 0x00007000
+
+/* Requestor is using the less trusted user device node (/dev/vboxuser) */
+#define VMMDEV_REQUESTOR_USER_DEVICE 0x00008000
+
/** HGCM service location types. */
enum vmmdev_hgcm_service_location_type {
VMMDEV_HGCM_LOC_INVALID = 0,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8974b3755670..3c18260403dd 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work)
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
struct xdp_frame *xdpf)
{
+ unsigned int hard_start_headroom;
unsigned int frame_size;
void *pkt_data_start;
struct sk_buff *skb;
+ /* Part of headroom was reserved to xdpf */
+ hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
+
/* build_skb need to place skb_shared_info after SKB end, and
* also want to know the memory "truesize". Thus, need to
* know the memory frame size backing xdp_buff.
@@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
* is not at a fixed memory location, with mixed length
* packets, which is bad for cache-line hotness.
*/
- frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) +
+ frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- pkt_data_start = xdpf->data - xdpf->headroom;
+ pkt_data_start = xdpf->data - hard_start_headroom;
skb = build_skb(pkt_data_start, frame_size);
if (!skb)
return NULL;
- skb_reserve(skb, xdpf->headroom);
+ skb_reserve(skb, hard_start_headroom);
__skb_put(skb, xdpf->len);
if (xdpf->metasize)
skb_metadata_set(skb, xdpf->metasize);
@@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
* - RX ring dev queue index (skb_record_rx_queue)
*/
+ /* Allow SKB to reuse area used by xdp_frame */
+ xdp_scrub_frame(xdpf);
+
return skb;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 2ada5e21dfa6..4a8f390a2b82 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -554,19 +554,6 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ
}
EXPORT_SYMBOL(bpf_prog_get_type_path);
-static void bpf_evict_inode(struct inode *inode)
-{
- enum bpf_type type;
-
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
-
- if (S_ISLNK(inode->i_mode))
- kfree(inode->i_link);
- if (!bpf_inode_type(inode, &type))
- bpf_any_put(inode->i_private, type);
-}
-
/*
* Display the mount options in /proc/mounts.
*/
@@ -579,11 +566,28 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
+static void bpf_destroy_inode_deferred(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ enum bpf_type type;
+
+ if (S_ISLNK(inode->i_mode))
+ kfree(inode->i_link);
+ if (!bpf_inode_type(inode, &type))
+ bpf_any_put(inode->i_private, type);
+ free_inode_nonrcu(inode);
+}
+
+static void bpf_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred);
+}
+
static const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.show_options = bpf_show_options,
- .evict_inode = bpf_evict_inode,
+ .destroy_inode = bpf_destroy_inode,
};
enum {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 62f6bced3a3c..afca36f53c49 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -136,21 +136,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
void *bpf_map_area_alloc(size_t size, int numa_node)
{
- /* We definitely need __GFP_NORETRY, so OOM killer doesn't
- * trigger under memory pressure as we really just want to
- * fail instead.
+ /* We really just want to fail instead of triggering OOM killer
+ * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
+ * which is used for lower order allocation requests.
+ *
+ * It has been observed that higher order allocation requests done by
+ * vmalloc with __GFP_NORETRY being set might fail due to not trying
+ * to reclaim memory from the page cache, thus we set
+ * __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
+
+ const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
void *area;
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- area = kmalloc_node(size, GFP_USER | flags, numa_node);
+ area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
+ numa_node);
if (area != NULL)
return area;
}
- return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags,
- __builtin_return_address(0));
+ return __vmalloc_node_flags_caller(size, numa_node,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+ flags, __builtin_return_address(0));
}
void bpf_map_area_free(void *area)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ce166a002d16..6c5a41f7f338 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
int access_size;
s64 msize_smax_value;
u64 msize_umax_value;
- int ptr_id;
+ int ref_obj_id;
int func_id;
};
@@ -346,35 +346,23 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
type == PTR_TO_TCP_SOCK_OR_NULL;
}
-static bool type_is_refcounted(enum bpf_reg_type type)
-{
- return type == PTR_TO_SOCKET;
-}
-
-static bool type_is_refcounted_or_null(enum bpf_reg_type type)
-{
- return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
-}
-
-static bool reg_is_refcounted(const struct bpf_reg_state *reg)
-{
- return type_is_refcounted(reg->type);
-}
-
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
return reg->type == PTR_TO_MAP_VALUE &&
map_value_has_spin_lock(reg->map_ptr);
}
-static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
+static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
- return type_is_refcounted_or_null(reg->type);
+ return type == PTR_TO_SOCKET ||
+ type == PTR_TO_SOCKET_OR_NULL ||
+ type == PTR_TO_TCP_SOCK ||
+ type == PTR_TO_TCP_SOCK_OR_NULL;
}
-static bool arg_type_is_refcounted(enum bpf_arg_type type)
+static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
{
- return type == ARG_PTR_TO_SOCKET;
+ return type == ARG_PTR_TO_SOCK_COMMON;
}
/* Determine whether the function releases some resources allocated by another
@@ -392,6 +380,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_sk_lookup_udp;
}
+static bool is_ptr_cast_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_tcp_sock ||
+ func_id == BPF_FUNC_sk_fullsock;
+}
+
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
@@ -466,6 +460,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, ",call_%d", func(env, reg)->callsite);
} else {
verbose(env, "(id=%d", reg->id);
+ if (reg_type_may_be_refcounted_or_null(t))
+ verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
if (t != SCALAR_VALUE)
verbose(env, ",off=%d", reg->off);
if (type_is_pkt_pointer(t))
@@ -1901,8 +1897,9 @@ continue_func:
}
frame++;
if (frame >= MAX_CALL_FRAMES) {
- WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
- return -EFAULT;
+ verbose(env, "the call stack of %d frames is too deep !\n",
+ frame);
+ return -E2BIG;
}
goto process_func;
}
@@ -2414,16 +2411,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
if (!type_is_sk_pointer(type))
goto err_type;
- } else if (arg_type == ARG_PTR_TO_SOCKET) {
- expected_type = PTR_TO_SOCKET;
- if (type != expected_type)
- goto err_type;
- if (meta->ptr_id || !reg->id) {
- verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
- meta->ptr_id, reg->id);
- return -EFAULT;
+ if (reg->ref_obj_id) {
+ if (meta->ref_obj_id) {
+ verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ regno, reg->ref_obj_id,
+ meta->ref_obj_id);
+ return -EFAULT;
+ }
+ meta->ref_obj_id = reg->ref_obj_id;
}
- meta->ptr_id = reg->id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
@@ -2740,32 +2736,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
return true;
}
-static bool check_refcount_ok(const struct bpf_func_proto *fn)
+static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
{
int count = 0;
- if (arg_type_is_refcounted(fn->arg1_type))
+ if (arg_type_may_be_refcounted(fn->arg1_type))
count++;
- if (arg_type_is_refcounted(fn->arg2_type))
+ if (arg_type_may_be_refcounted(fn->arg2_type))
count++;
- if (arg_type_is_refcounted(fn->arg3_type))
+ if (arg_type_may_be_refcounted(fn->arg3_type))
count++;
- if (arg_type_is_refcounted(fn->arg4_type))
+ if (arg_type_may_be_refcounted(fn->arg4_type))
count++;
- if (arg_type_is_refcounted(fn->arg5_type))
+ if (arg_type_may_be_refcounted(fn->arg5_type))
count++;
+ /* A reference acquiring function cannot acquire
+ * another refcounted ptr.
+ */
+ if (is_acquire_function(func_id) && count)
+ return false;
+
/* We only support one arg being unreferenced at the moment,
* which is sufficient for the helper functions we have right now.
*/
return count <= 1;
}
-static int check_func_proto(const struct bpf_func_proto *fn)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
- check_refcount_ok(fn) ? 0 : -EINVAL;
+ check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2799,19 +2801,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
}
static void release_reg_references(struct bpf_verifier_env *env,
- struct bpf_func_state *state, int id)
+ struct bpf_func_state *state,
+ int ref_obj_id)
{
struct bpf_reg_state *regs = state->regs, *reg;
int i;
for (i = 0; i < MAX_BPF_REG; i++)
- if (regs[i].id == id)
+ if (regs[i].ref_obj_id == ref_obj_id)
mark_reg_unknown(env, regs, i);
bpf_for_each_spilled_reg(i, state, reg) {
if (!reg)
continue;
- if (reg_is_refcounted(reg) && reg->id == id)
+ if (reg->ref_obj_id == ref_obj_id)
__mark_reg_unknown(reg);
}
}
@@ -2820,15 +2823,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
* resources. Identify all copies of the same pointer and clear the reference.
*/
static int release_reference(struct bpf_verifier_env *env,
- struct bpf_call_arg_meta *meta)
+ int ref_obj_id)
{
struct bpf_verifier_state *vstate = env->cur_state;
+ int err;
int i;
+ err = release_reference_state(cur_func(env), ref_obj_id);
+ if (err)
+ return err;
+
for (i = 0; i <= vstate->curframe; i++)
- release_reg_references(env, vstate->frame[i], meta->ptr_id);
+ release_reg_references(env, vstate->frame[i], ref_obj_id);
- return release_reference_state(cur_func(env), meta->ptr_id);
+ return 0;
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3047,7 +3055,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- err = check_func_proto(fn);
+ err = check_func_proto(fn, func_id);
if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
@@ -3093,7 +3101,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return err;
}
} else if (is_release_function(func_id)) {
- err = release_reference(env, &meta);
+ err = release_reference(env, meta.ref_obj_id);
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id);
@@ -3154,8 +3162,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
if (id < 0)
return id;
- /* For release_reference() */
+ /* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = id;
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = id;
} else {
/* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = ++env->id_gen;
@@ -3170,6 +3180,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
+ if (is_ptr_cast_function(func_id))
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
err = check_map_func_compatibility(env, meta.map_ptr, func_id);
@@ -3368,7 +3382,7 @@ do_sim:
*dst_reg = *ptr_reg;
}
ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
- if (!ptr_is_dst_reg)
+ if (!ptr_is_dst_reg && ret)
*dst_reg = tmp;
return !ret ? -EFAULT : 0;
}
@@ -4665,11 +4679,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
reg->type = PTR_TO_TCP_SOCK;
}
- if (is_null || !(reg_is_refcounted(reg) ||
- reg_may_point_to_spin_lock(reg))) {
- /* We don't need id from this point onwards anymore,
- * thus we should better reset it, so that state
- * pruning has chances to take effect.
+ if (is_null) {
+ /* We don't need id and ref_obj_id from this point
+ * onwards anymore, thus we should better reset it,
+ * so that state pruning has chances to take effect.
+ */
+ reg->id = 0;
+ reg->ref_obj_id = 0;
+ } else if (!reg_may_point_to_spin_lock(reg)) {
+ /* For not-NULL ptr, reg->ref_obj_id will be reset
+ * in release_reg_references().
+ *
+ * reg->id is still used by spin_lock ptr. Other
+ * than spin_lock ptr type, reg->id can be reset.
*/
reg->id = 0;
}
@@ -4684,11 +4706,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
{
struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *reg, *regs = state->regs;
+ u32 ref_obj_id = regs[regno].ref_obj_id;
u32 id = regs[regno].id;
int i, j;
- if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
- release_reference_state(state, id);
+ if (ref_obj_id && ref_obj_id == id && is_null)
+ /* regs[regno] is in the " == NULL" branch.
+ * No one could have freed the reference state before
+ * doing the NULL check.
+ */
+ WARN_ON_ONCE(release_reference_state(state, id));
for (i = 0; i < MAX_BPF_REG; i++)
mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@@ -6052,15 +6079,17 @@ static int propagate_liveness(struct bpf_verifier_env *env,
}
/* Propagate read liveness of registers... */
BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
- /* We don't need to worry about FP liveness because it's read-only */
- for (i = 0; i < BPF_REG_FP; i++) {
- if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
- continue;
- if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
- err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
- &vparent->frame[vstate->curframe]->regs[i]);
- if (err)
- return err;
+ for (frame = 0; frame <= vstate->curframe; frame++) {
+ /* We don't need to worry about FP liveness, it's read-only */
+ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
+ if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
+ continue;
+ if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
+ err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
+ &vparent->frame[frame]->regs[i]);
+ if (err)
+ return err;
+ }
}
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 025f419d16f6..6754f3ecfd94 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -564,6 +564,20 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
}
+static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
+{
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ return true;
+ /*
+ * When CPU hotplug is disabled, then taking the CPU down is not
+ * possible because takedown_cpu() and the architecture and
+ * subsystem specific mechanisms are not available. So the CPU
+ * which would be completely unplugged again needs to stay around
+ * in the current state.
+ */
+ return st->state <= CPUHP_BRINGUP_CPU;
+}
+
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
@@ -574,8 +588,10 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
st->state++;
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
if (ret) {
- st->target = prev_state;
- undo_cpu_up(cpu, st);
+ if (can_rollback_cpu(st)) {
+ st->target = prev_state;
+ undo_cpu_up(cpu, st);
+ }
break;
}
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 771e93f9c43f..6f357f4fc859 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -29,6 +29,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/cn_proc.h>
#include <linux/compat.h>
+#include <linux/sched/signal.h>
/*
* Access another process' address space via ptrace.
@@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request,
ret = ptrace_setsiginfo(child, &siginfo);
break;
- case PTRACE_GETSIGMASK:
+ case PTRACE_GETSIGMASK: {
+ sigset_t *mask;
+
if (addr != sizeof(sigset_t)) {
ret = -EINVAL;
break;
}
- if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t)))
+ if (test_tsk_restore_sigmask(child))
+ mask = &child->saved_sigmask;
+ else
+ mask = &child->blocked;
+
+ if (copy_to_user(datavp, mask, sizeof(sigset_t)))
ret = -EFAULT;
else
ret = 0;
break;
+ }
case PTRACE_SETSIGMASK: {
sigset_t new_set;
@@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request,
child->blocked = new_set;
spin_unlock_irq(&child->sighand->siglock);
+ clear_tsk_restore_sigmask(child);
+
ret = 0;
break;
}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54a0347ca812..df27e499956a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -149,7 +149,7 @@ static void populate_seccomp_data(struct seccomp_data *sd)
sd->nr = syscall_get_nr(task, regs);
sd->arch = syscall_get_arch();
- syscall_get_arguments(task, regs, 0, 6, args);
+ syscall_get_arguments(task, regs, args);
sd->args[0] = args[0];
sd->args[1] = args[1];
sd->args[2] = args[2];
diff --git a/kernel/signal.c b/kernel/signal.c
index b7953934aa99..f98448cf2def 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3605,16 +3605,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
if (unlikely(sig != kinfo.si_signo))
goto err;
+ /* Only allow sending arbitrary signals to yourself. */
+ ret = -EPERM;
if ((task_pid(current) != pid) &&
- (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) {
- /* Only allow sending arbitrary signals to yourself. */
- ret = -EPERM;
- if (kinfo.si_code != SI_USER)
- goto err;
-
- /* Turn this into a regular kill signal. */
- prepare_kill_siginfo(sig, &kinfo);
- }
+ (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
+ goto err;
} else {
prepare_kill_siginfo(sig, &kinfo);
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e5da394d1ca3..c9ec050bcf46 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -128,6 +128,7 @@ static int zero;
static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
+static unsigned long zero_ul;
static unsigned long one_ul = 1;
static unsigned long long_max = LONG_MAX;
static int one_hundred = 100;
@@ -1750,7 +1751,7 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(files_stat.max_files),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero,
+ .extra1 = &zero_ul,
.extra2 = &long_max,
},
{
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa79323331b2..26c8ca9bd06b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1992,7 +1992,7 @@ static void print_bug_type(void)
* modifying the code. @failed should be one of either:
* EFAULT - if the problem happens on reading the @ip address
* EINVAL - if what is read at @ip is not what was expected
- * EPERM - if the problem happens on writting to the @ip address
+ * EPERM - if the problem happens on writing to the @ip address
*/
void ftrace_bug(int failed, struct dyn_ftrace *rec)
{
@@ -2391,7 +2391,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
}
- return -1; /* unknow ftrace bug */
+ return -1; /* unknown ftrace bug */
}
void __weak ftrace_replace_code(int mod_flags)
@@ -3004,7 +3004,7 @@ ftrace_allocate_pages(unsigned long num_to_init)
int cnt;
if (!num_to_init)
- return 0;
+ return NULL;
start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
if (!pg)
@@ -4755,7 +4755,7 @@ static int
ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
int reset, int enable)
{
- return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
+ return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable);
}
/**
@@ -5463,7 +5463,7 @@ void ftrace_create_filter_files(struct ftrace_ops *ops,
/*
* The name "destroy_filter_files" is really a misnomer. Although
- * in the future, it may actualy delete the files, but this is
+ * in the future, it may actually delete the files, but this is
* really intended to make sure the ops passed in are disabled
* and that when this function returns, the caller is free to
* free the ops.
@@ -5786,7 +5786,7 @@ void ftrace_module_enable(struct module *mod)
/*
* If the tracing is enabled, go ahead and enable the record.
*
- * The reason not to enable the record immediatelly is the
+ * The reason not to enable the record immediately is the
* inherent check of ftrace_make_nop/ftrace_make_call for
* correct previous instructions. Making first the NOP
* conversion puts the module to the correct state, thus
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index dd1f43588d70..fa100ed3b4de 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -74,7 +74,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type)
static int create_dyn_event(int argc, char **argv)
{
struct dyn_event_operations *ops;
- int ret;
+ int ret = -ENODEV;
if (argv[0][0] == '-' || argv[0][0] == '!')
return dyn_event_release(argc, argv, NULL);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index ca46339f3009..795aa2038377 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3713,7 +3713,6 @@ static void track_data_destroy(struct hist_trigger_data *hist_data,
struct trace_event_file *file = hist_data->event_file;
destroy_hist_field(data->track_data.track_var, 0);
- destroy_hist_field(data->track_data.var_ref, 0);
if (data->action == ACTION_SNAPSHOT) {
struct track_data *track_data;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f93a56d2db27..fa8fbff736d6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -314,6 +314,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
struct ring_buffer_event *event;
struct ring_buffer *buffer;
unsigned long irq_flags;
+ unsigned long args[6];
int pc;
int syscall_nr;
int size;
@@ -347,7 +348,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
entry = ring_buffer_event_data(event);
entry->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
event_trigger_unlock_commit(trace_file, buffer, event, entry,
irq_flags, pc);
@@ -583,6 +585,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
struct hlist_head *head;
+ unsigned long args[6];
bool valid_prog_array;
int syscall_nr;
int rctx;
@@ -613,8 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
return;
rec->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args,
- (unsigned long *)&rec->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 403c9bd90413..6a5787233113 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -554,13 +554,15 @@ static void softlockup_start_all(void)
int lockup_detector_online_cpu(unsigned int cpu)
{
- watchdog_enable(cpu);
+ if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
+ watchdog_enable(cpu);
return 0;
}
int lockup_detector_offline_cpu(unsigned int cpu)
{
- watchdog_disable(cpu);
+ if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
+ watchdog_disable(cpu);
return 0;
}
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ea36dc355da1..b396d328a764 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1528,6 +1528,7 @@ EXPORT_SYMBOL(csum_and_copy_to_iter);
size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
struct iov_iter *i)
{
+#ifdef CONFIG_CRYPTO
struct ahash_request *hash = hashp;
struct scatterlist sg;
size_t copied;
@@ -1537,6 +1538,9 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
ahash_request_set_crypt(hash, &sg, NULL, copied);
crypto_ahash_update(hash);
return copied;
+#else
+ return 0;
+#endif
}
EXPORT_SYMBOL(hash_and_copy_to_iter);
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 4525fb094844..a8ede77afe0d 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -291,13 +291,14 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
{
const unsigned char *ip = in;
unsigned char *op = out;
+ unsigned char *data_start;
size_t l = in_len;
size_t t = 0;
signed char state_offset = -2;
unsigned int m4_max_offset;
- // LZO v0 will never write 17 as first byte,
- // so this is used to version the bitstream
+ // LZO v0 will never write 17 as first byte (except for zero-length
+ // input), so this is used to version the bitstream
if (bitstream_version > 0) {
*op++ = 17;
*op++ = bitstream_version;
@@ -306,6 +307,8 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
m4_max_offset = M4_MAX_OFFSET_V0;
}
+ data_start = op;
+
while (l > 20) {
size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
uintptr_t ll_end = (uintptr_t) ip + ll;
@@ -324,7 +327,7 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
if (t > 0) {
const unsigned char *ii = in + in_len - t;
- if (op == out && t <= 238) {
+ if (op == data_start && t <= 238) {
*op++ = (17 + t);
} else if (t <= 3) {
op[state_offset] |= t;
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 6d2600ea3b55..9e07e9ef1aad 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -54,11 +54,9 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
if (unlikely(in_len < 3))
goto input_overrun;
- if (likely(*ip == 17)) {
+ if (likely(in_len >= 5) && likely(*ip == 17)) {
bitstream_version = ip[1];
ip += 2;
- if (unlikely(in_len < 5))
- goto input_overrun;
} else {
bitstream_version = 0;
}
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 0a105d4af166..97f59abc3e92 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -416,8 +416,12 @@ static void rht_deferred_worker(struct work_struct *work)
else if (tbl->nest)
err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
- if (!err)
- err = rhashtable_rehash_table(ht);
+ if (!err || err == -EEXIST) {
+ int nerr;
+
+ nerr = rhashtable_rehash_table(ht);
+ err = err ?: nerr;
+ }
mutex_unlock(&ht->mutex);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 5b382c1244ed..155fe38756ec 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -591,6 +591,17 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu)
{
+ /*
+ * Once the clear bit is set, the bit may be allocated out.
+ *
+ * Orders READ/WRITE on the asssociated instance(such as request
+ * of blk_mq) by this bit for avoiding race with re-allocation,
+ * and its pair is the memory barrier implied in __sbitmap_get_word.
+ *
+ * One invariant is that the clear bit has to be zero when the bit
+ * is in use.
+ */
+ smp_mb__before_atomic();
sbitmap_deferred_clear_bit(&sbq->sb, nr);
/*
diff --git a/lib/string.c b/lib/string.c
index 38e4ca08e757..3ab861c1a857 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -866,6 +866,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count)
EXPORT_SYMBOL(memcmp);
#endif
+#ifndef __HAVE_ARCH_BCMP
+/**
+ * bcmp - returns 0 if and only if the buffers have identical contents.
+ * @a: pointer to first buffer.
+ * @b: pointer to second buffer.
+ * @len: size of buffers.
+ *
+ * The sign or magnitude of a non-zero return value has no particular
+ * meaning, and architectures may implement their own more efficient bcmp(). So
+ * while this particular implementation is a simple (tail) call to memcmp, do
+ * not rely on anything but whether the return value is zero or non-zero.
+ */
+#undef bcmp
+int bcmp(const void *a, const void *b, size_t len)
+{
+ return memcmp(a, b, len);
+}
+EXPORT_SYMBOL(bcmp);
+#endif
+
#ifndef __HAVE_ARCH_MEMSCAN
/**
* memscan - Find a character in an area of memory.
diff --git a/lib/syscall.c b/lib/syscall.c
index 1a7077f20eae..fb328e7ccb08 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -5,16 +5,14 @@
#include <linux/export.h>
#include <asm/syscall.h>
-static int collect_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc)
+static int collect_syscall(struct task_struct *target, struct syscall_info *info)
{
struct pt_regs *regs;
if (!try_get_task_stack(target)) {
/* Task has no stack, so the task isn't in a syscall. */
- *sp = *pc = 0;
- *callno = -1;
+ memset(info, 0, sizeof(*info));
+ info->data.nr = -1;
return 0;
}
@@ -24,12 +22,13 @@ static int collect_syscall(struct task_struct *target, long *callno,
return -EAGAIN;
}
- *sp = user_stack_pointer(regs);
- *pc = instruction_pointer(regs);
+ info->sp = user_stack_pointer(regs);
+ info->data.instruction_pointer = instruction_pointer(regs);
- *callno = syscall_get_nr(target, regs);
- if (*callno != -1L && maxargs > 0)
- syscall_get_arguments(target, regs, 0, maxargs, args);
+ info->data.nr = syscall_get_nr(target, regs);
+ if (info->data.nr != -1L)
+ syscall_get_arguments(target, regs,
+ (unsigned long *)&info->data.args[0]);
put_task_stack(target);
return 0;
@@ -38,41 +37,35 @@ static int collect_syscall(struct task_struct *target, long *callno,
/**
* task_current_syscall - Discover what a blocked task is doing.
* @target: thread to examine
- * @callno: filled with system call number or -1
- * @args: filled with @maxargs system call arguments
- * @maxargs: number of elements in @args to fill
- * @sp: filled with user stack pointer
- * @pc: filled with user PC
+ * @info: structure with the following fields:
+ * .sp - filled with user stack pointer
+ * .data.nr - filled with system call number or -1
+ * .data.args - filled with @maxargs system call arguments
+ * .data.instruction_pointer - filled with user PC
*
- * If @target is blocked in a system call, returns zero with *@callno
- * set to the the call's number and @args filled in with its arguments.
- * Registers not used for system call arguments may not be available and
- * it is not kosher to use &struct user_regset calls while the system
+ * If @target is blocked in a system call, returns zero with @info.data.nr
+ * set to the the call's number and @info.data.args filled in with its
+ * arguments. Registers not used for system call arguments may not be available
+ * and it is not kosher to use &struct user_regset calls while the system
* call is still in progress. Note we may get this result if @target
* has finished its system call but not yet returned to user mode, such
* as when it's stopped for signal handling or syscall exit tracing.
*
* If @target is blocked in the kernel during a fault or exception,
- * returns zero with *@callno set to -1 and does not fill in @args.
- * If so, it's now safe to examine @target using &struct user_regset
- * get() calls as long as we're sure @target won't return to user mode.
+ * returns zero with *@info.data.nr set to -1 and does not fill in
+ * @info.data.args. If so, it's now safe to examine @target using
+ * &struct user_regset get() calls as long as we're sure @target won't return
+ * to user mode.
*
* Returns -%EAGAIN if @target does not remain blocked.
- *
- * Returns -%EINVAL if @maxargs is too large (maximum is six).
*/
-int task_current_syscall(struct task_struct *target, long *callno,
- unsigned long args[6], unsigned int maxargs,
- unsigned long *sp, unsigned long *pc)
+int task_current_syscall(struct task_struct *target, struct syscall_info *info)
{
long state;
unsigned long ncsw;
- if (unlikely(maxargs > 6))
- return -EINVAL;
-
if (target == current)
- return collect_syscall(target, callno, args, maxargs, sp, pc);
+ return collect_syscall(target, info);
state = target->state;
if (unlikely(!state))
@@ -80,7 +73,7 @@ int task_current_syscall(struct task_struct *target, long *callno,
ncsw = wait_task_inactive(target, state);
if (unlikely(!ncsw) ||
- unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
+ unlikely(collect_syscall(target, info)) ||
unlikely(wait_task_inactive(target, state) != ncsw))
return -EAGAIN;
diff --git a/mm/compaction.c b/mm/compaction.c
index f171a83707ce..3319e0872d01 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -242,6 +242,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
bool check_target)
{
struct page *page = pfn_to_online_page(pfn);
+ struct page *block_page;
struct page *end_page;
unsigned long block_pfn;
@@ -267,20 +268,26 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
return false;
+ /* Ensure the start of the pageblock or zone is online and valid */
+ block_pfn = pageblock_start_pfn(pfn);
+ block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
+ if (block_page) {
+ page = block_page;
+ pfn = block_pfn;
+ }
+
+ /* Ensure the end of the pageblock or zone is online and valid */
+ block_pfn += pageblock_nr_pages;
+ block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
+ end_page = pfn_to_online_page(block_pfn);
+ if (!end_page)
+ return false;
+
/*
* Only clear the hint if a sample indicates there is either a
* free page or an LRU page in the block. One or other condition
* is necessary for the block to be a migration source/target.
*/
- block_pfn = pageblock_start_pfn(pfn);
- pfn = max(block_pfn, zone->zone_start_pfn);
- page = pfn_to_page(pfn);
- if (zone != page_zone(page))
- return false;
- pfn = block_pfn + pageblock_nr_pages;
- pfn = min(pfn, zone_end_pfn(zone));
- end_page = pfn_to_page(pfn);
-
do {
if (pfn_valid_within(pfn)) {
if (check_source && PageLRU(page)) {
@@ -309,7 +316,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
static void __reset_isolation_suitable(struct zone *zone)
{
unsigned long migrate_pfn = zone->zone_start_pfn;
- unsigned long free_pfn = zone_end_pfn(zone);
+ unsigned long free_pfn = zone_end_pfn(zone) - 1;
unsigned long reset_migrate = free_pfn;
unsigned long reset_free = migrate_pfn;
bool source_set = false;
@@ -1363,7 +1370,7 @@ fast_isolate_freepages(struct compact_control *cc)
count_compact_events(COMPACTISOLATED, nr_isolated);
} else {
/* If isolation fails, abort the search */
- order = -1;
+ order = cc->search_order + 1;
page = NULL;
}
}
diff --git a/mm/debug.c b/mm/debug.c
index c0b31b6c3877..eee9c221280c 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -79,7 +79,7 @@ void __dump_page(struct page *page, const char *reason)
pr_warn("ksm ");
else if (mapping) {
pr_warn("%ps ", mapping->a_ops);
- if (mapping->host->i_dentry.first) {
+ if (mapping->host && mapping->host->i_dentry.first) {
struct dentry *dentry;
dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
pr_warn("name:\"%pd\" ", dentry);
@@ -168,7 +168,7 @@ void dump_mm(const struct mm_struct *mm)
mm_pgtables_bytes(mm),
mm->map_count,
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
- atomic64_read(&mm->pinned_vm),
+ (u64)atomic64_read(&mm->pinned_vm),
mm->data_vm, mm->exec_vm, mm->stack_vm,
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
mm->start_brk, mm->brk, mm->start_stack,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 404acdcd0455..165ea46bf149 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -755,6 +755,21 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pmd_lock(mm, pmd);
+ if (!pmd_none(*pmd)) {
+ if (write) {
+ if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
+ goto out_unlock;
+ }
+ entry = pmd_mkyoung(*pmd);
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+ if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+ update_mmu_cache_pmd(vma, addr, pmd);
+ }
+
+ goto out_unlock;
+ }
+
entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pmd_mkdevmap(entry);
@@ -766,11 +781,16 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (pgtable) {
pgtable_trans_huge_deposit(mm, pmd, pgtable);
mm_inc_nr_ptes(mm);
+ pgtable = NULL;
}
set_pmd_at(mm, addr, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
+
+out_unlock:
spin_unlock(ptl);
+ if (pgtable)
+ pte_free(mm, pgtable);
}
vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -821,6 +841,20 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pud_lock(mm, pud);
+ if (!pud_none(*pud)) {
+ if (write) {
+ if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pud(*pud));
+ goto out_unlock;
+ }
+ entry = pud_mkyoung(*pud);
+ entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+ if (pudp_set_access_flags(vma, addr, pud, entry, 1))
+ update_mmu_cache_pud(vma, addr, pud);
+ }
+ goto out_unlock;
+ }
+
entry = pud_mkhuge(pfn_t_pud(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pud_mkdevmap(entry);
@@ -830,6 +864,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
}
set_pud_at(mm, addr, pud, entry);
update_mmu_cache_pud(vma, addr, pud);
+
+out_unlock:
spin_unlock(ptl);
}
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 3e0c11f7d7a1..3ce956efa0cb 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -163,7 +163,10 @@ static inline u8 random_tag(void)
#endif
#ifndef arch_kasan_set_tag
-#define arch_kasan_set_tag(addr, tag) ((void *)(addr))
+static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
+{
+ return addr;
+}
#endif
#ifndef arch_kasan_reset_tag
#define arch_kasan_reset_tag(addr) ((void *)(addr))
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 707fa5579f66..6c318f5ac234 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1529,11 +1529,6 @@ static void kmemleak_scan(void)
}
rcu_read_unlock();
- /* data/bss scanning */
- scan_large_block(_sdata, _edata);
- scan_large_block(__bss_start, __bss_stop);
- scan_large_block(__start_ro_after_init, __end_ro_after_init);
-
#ifdef CONFIG_SMP
/* per-cpu sections scanning */
for_each_possible_cpu(i)
@@ -2071,6 +2066,17 @@ void __init kmemleak_init(void)
}
local_irq_restore(flags);
+ /* register the data/bss sections */
+ create_object((unsigned long)_sdata, _edata - _sdata,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+ create_object((unsigned long)__bss_start, __bss_stop - __bss_start,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+ /* only register .data..ro_after_init if not within .data */
+ if (__start_ro_after_init < _sdata || __end_ro_after_init > _edata)
+ create_object((unsigned long)__start_ro_after_init,
+ __end_ro_after_init - __start_ro_after_init,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+
/*
* This is the point where tracking allocations is safe. Automatic
* scanning is started during the late initcall. Add the early logged
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 532e0e2a4817..81a0d3914ec9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3882,6 +3882,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
return &memcg->cgwb_domain;
}
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page().
+ */
+static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
+{
+ long x = atomic_long_read(&memcg->stat[idx]);
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
+ if (x < 0)
+ x = 0;
+ return x;
+}
+
/**
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
* @wb: bdi_writeback in question
@@ -3907,10 +3923,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
struct mem_cgroup *parent;
- *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+ *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
/* this should eventually include NR_UNSTABLE_NFS */
- *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+ *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
(1 << LRU_ACTIVE_FILE));
*pheadroom = PAGE_COUNTER_MAX;
diff --git a/mm/memory.c b/mm/memory.c
index 47fe250307c7..ab650c21bccd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1549,10 +1549,12 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte)));
goto out_unlock;
}
- entry = *pte;
- goto out_mkwrite;
- } else
- goto out_unlock;
+ entry = pte_mkyoung(*pte);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ if (ptep_set_access_flags(vma, addr, pte, entry, 1))
+ update_mmu_cache(vma, addr, pte);
+ }
+ goto out_unlock;
}
/* Ok, finally just insert the thing.. */
@@ -1561,7 +1563,6 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
else
entry = pte_mkspecial(pfn_t_pte(pfn, prot));
-out_mkwrite:
if (mkwrite) {
entry = pte_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f767582af4f8..0082d699be94 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1576,7 +1576,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
{
unsigned long pfn, nr_pages;
long offlined_pages;
- int ret, node;
+ int ret, node, nr_isolate_pageblock;
unsigned long flags;
unsigned long valid_start, valid_end;
struct zone *zone;
@@ -1602,10 +1602,11 @@ static int __ref __offline_pages(unsigned long start_pfn,
ret = start_isolate_page_range(start_pfn, end_pfn,
MIGRATE_MOVABLE,
SKIP_HWPOISON | REPORT_FAILURE);
- if (ret) {
+ if (ret < 0) {
reason = "failure to isolate range";
goto failed_removal;
}
+ nr_isolate_pageblock = ret;
arg.start_pfn = start_pfn;
arg.nr_pages = nr_pages;
@@ -1657,8 +1658,16 @@ static int __ref __offline_pages(unsigned long start_pfn,
/* Ok, all of our target is isolated.
We cannot do rollback at this point. */
offline_isolated_pages(start_pfn, end_pfn);
- /* reset pagetype flags and makes migrate type to be MOVABLE */
- undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+
+ /*
+ * Onlining will reset pagetype flags and makes migrate type
+ * MOVABLE, so just need to decrease the number of isolated
+ * pageblocks zone counter here.
+ */
+ spin_lock_irqsave(&zone->lock, flags);
+ zone->nr_isolate_pageblock -= nr_isolate_pageblock;
+ spin_unlock_irqrestore(&zone->lock, flags);
+
/* removal success */
adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
zone->present_pages -= offlined_pages;
@@ -1690,12 +1699,12 @@ static int __ref __offline_pages(unsigned long start_pfn,
failed_removal_isolated:
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+ memory_notify(MEM_CANCEL_OFFLINE, &arg);
failed_removal:
pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
(unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
- memory_notify(MEM_CANCEL_OFFLINE, &arg);
/* pushback to free area */
mem_hotplug_done();
return ret;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index af171ccb56a2..2219e747df49 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -428,6 +428,13 @@ static inline bool queue_pages_required(struct page *page,
return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
}
+/*
+ * queue_pages_pmd() has three possible return values:
+ * 1 - pages are placed on the right node or queued successfully.
+ * 0 - THP was split.
+ * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
+ * page was already on a node that does not follow the policy.
+ */
static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
@@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long flags;
if (unlikely(is_pmd_migration_entry(*pmd))) {
- ret = 1;
+ ret = -EIO;
goto unlock;
}
page = pmd_page(*pmd);
@@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
ret = 1;
flags = qp->flags;
/* go to thp migration */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(walk->vma)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
migrate_page_add(page, qp->pagelist, flags);
+ } else
+ ret = -EIO;
unlock:
spin_unlock(ptl);
out:
@@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
- if (ret)
+ if (ret > 0)
return 0;
+ else if (ret < 0)
+ return ret;
}
if (pmd_trans_unstable(pmd))
@@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
continue;
if (!queue_pages_required(page, qp))
continue;
- migrate_page_add(page, qp->pagelist, flags);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(vma))
+ break;
+ migrate_page_add(page, qp->pagelist, flags);
+ } else
+ break;
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
- return 0;
+ return addr != end ? -EIO : 0;
}
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
@@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;
- if (!vma_migratable(vma))
+ /*
+ * Need check MPOL_MF_STRICT to return -EIO if possible
+ * regardless of vma_migratable
+ */
+ if (!vma_migratable(vma) &&
+ !(flags & MPOL_MF_STRICT))
return 1;
if (endvma > end)
@@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
}
/* queue pages from current vma */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & MPOL_MF_VALID)
return 0;
return 1;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index ac6f4939bb59..663a5449367a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -248,10 +248,8 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
pte = swp_entry_to_pte(entry);
} else if (is_device_public_page(new)) {
pte = pte_mkdevmap(pte);
- flush_dcache_page(new);
}
- } else
- flush_dcache_page(new);
+ }
#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(new)) {
@@ -995,6 +993,13 @@ static int move_to_new_page(struct page *newpage, struct page *page,
*/
if (!PageMappingFlags(page))
page->mapping = NULL;
+
+ if (unlikely(is_zone_device_page(newpage))) {
+ if (is_device_public_page(newpage))
+ flush_dcache_page(newpage);
+ } else
+ flush_dcache_page(newpage);
+
}
out:
return rc;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 03fcf73d47da..d96ca5bc555b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8233,7 +8233,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
ret = start_isolate_page_range(pfn_max_align_down(start),
pfn_max_align_up(end), migratetype, 0);
- if (ret)
+ if (ret < 0)
return ret;
/*
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index ce323e56b34d..019280712e1b 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -59,7 +59,8 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
* FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
* We just check MOVABLE pages.
*/
- if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, flags))
+ if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype,
+ isol_flags))
ret = 0;
/*
@@ -160,27 +161,36 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
return NULL;
}
-/*
- * start_isolate_page_range() -- make page-allocation-type of range of pages
- * to be MIGRATE_ISOLATE.
- * @start_pfn: The lower PFN of the range to be isolated.
- * @end_pfn: The upper PFN of the range to be isolated.
- * @migratetype: migrate type to set in error recovery.
+/**
+ * start_isolate_page_range() - make page-allocation-type of range of pages to
+ * be MIGRATE_ISOLATE.
+ * @start_pfn: The lower PFN of the range to be isolated.
+ * @end_pfn: The upper PFN of the range to be isolated.
+ * start_pfn/end_pfn must be aligned to pageblock_order.
+ * @migratetype: Migrate type to set in error recovery.
+ * @flags: The following flags are allowed (they can be combined in
+ * a bit mask)
+ * SKIP_HWPOISON - ignore hwpoison pages
+ * REPORT_FAILURE - report details about the failure to
+ * isolate the range
*
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
* the range will never be allocated. Any free pages and pages freed in the
- * future will not be allocated again.
- *
- * start_pfn/end_pfn must be aligned to pageblock_order.
- * Return 0 on success and -EBUSY if any part of range cannot be isolated.
+ * future will not be allocated again. If specified range includes migrate types
+ * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
+ * pages in the range finally, the caller have to free all pages in the range.
+ * test_page_isolated() can be used for test it.
*
* There is no high level synchronization mechanism that prevents two threads
- * from trying to isolate overlapping ranges. If this happens, one thread
+ * from trying to isolate overlapping ranges. If this happens, one thread
* will notice pageblocks in the overlapping range already set to isolate.
* This happens in set_migratetype_isolate, and set_migratetype_isolate
- * returns an error. We then clean up by restoring the migration type on
- * pageblocks we may have modified and return -EBUSY to caller. This
+ * returns an error. We then clean up by restoring the migration type on
+ * pageblocks we may have modified and return -EBUSY to caller. This
* prevents two threads from simultaneously working on overlapping ranges.
+ *
+ * Return: the number of isolated pageblocks on success and -EBUSY if any part
+ * of range cannot be isolated.
*/
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned migratetype, int flags)
@@ -188,6 +198,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned long pfn;
unsigned long undo_pfn;
struct page *page;
+ int nr_isolate_pageblock = 0;
BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
@@ -196,13 +207,15 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
pfn < end_pfn;
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
- if (page &&
- set_migratetype_isolate(page, migratetype, flags)) {
- undo_pfn = pfn;
- goto undo;
+ if (page) {
+ if (set_migratetype_isolate(page, migratetype, flags)) {
+ undo_pfn = pfn;
+ goto undo;
+ }
+ nr_isolate_pageblock++;
}
}
- return 0;
+ return nr_isolate_pageblock;
undo:
for (pfn = start_pfn;
pfn < undo_pfn;
diff --git a/mm/slab.c b/mm/slab.c
index 28652e4218e0..47a380a486ee 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2115,6 +2115,8 @@ done:
cachep->allocflags = __GFP_COMP;
if (flags & SLAB_CACHE_DMA)
cachep->allocflags |= GFP_DMA;
+ if (flags & SLAB_CACHE_DMA32)
+ cachep->allocflags |= GFP_DMA32;
if (flags & SLAB_RECLAIM_ACCOUNT)
cachep->allocflags |= __GFP_RECLAIMABLE;
cachep->size = size;
@@ -4306,7 +4308,8 @@ static void show_symbol(struct seq_file *m, unsigned long address)
static int leaks_show(struct seq_file *m, void *p)
{
- struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
+ struct kmem_cache *cachep = list_entry(p, struct kmem_cache,
+ root_caches_node);
struct page *page;
struct kmem_cache_node *n;
const char *name;
diff --git a/mm/slab.h b/mm/slab.h
index e5e6658eeacc..43ac818b8592 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
/* Legal flag mask for kmem_cache_create(), for various configurations */
-#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
+#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
+ SLAB_CACHE_DMA32 | SLAB_PANIC | \
SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
#if defined(CONFIG_DEBUG_SLAB)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 03eeb8b7b4b1..58251ba63e4a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
SLAB_FAILSLAB | SLAB_KASAN)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
- SLAB_ACCOUNT)
+ SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
/*
* Merge control. If this is set then no merging of slab caches will occur.
diff --git a/mm/slub.c b/mm/slub.c
index 1b08fbcb7e61..d30ede89f4a6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3589,6 +3589,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= GFP_DMA;
+ if (s->flags & SLAB_CACHE_DMA32)
+ s->allocflags |= GFP_DMA32;
+
if (s->flags & SLAB_RECLAIM_ACCOUNT)
s->allocflags |= __GFP_RECLAIMABLE;
@@ -5679,6 +5682,8 @@ static char *create_unique_id(struct kmem_cache *s)
*/
if (s->flags & SLAB_CACHE_DMA)
*p++ = 'd';
+ if (s->flags & SLAB_CACHE_DMA32)
+ *p++ = 'D';
if (s->flags & SLAB_RECLAIM_ACCOUNT)
*p++ = 'a';
if (s->flags & SLAB_CONSISTENCY_CHECKS)
diff --git a/mm/sparse.c b/mm/sparse.c
index 69904aa6165b..56e057c432f9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -567,7 +567,7 @@ void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-/* Mark all memory sections within the pfn range as online */
+/* Mark all memory sections within the pfn range as offline */
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
diff --git a/mm/util.c b/mm/util.c
index d559bde497a9..43a2984bccaa 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -204,7 +204,7 @@ EXPORT_SYMBOL(vmemdup_user);
* @s: The string to duplicate
* @n: Maximum number of bytes to copy, including the trailing NUL.
*
- * Return: newly allocated copy of @s or %NULL in case of error
+ * Return: newly allocated copy of @s or an ERR_PTR() in case of error
*/
char *strndup_user(const char __user *s, long n)
{
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 15293c2a5dd8..8d77b6ee4477 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -443,27 +443,29 @@ static int vlan_dev_fcoe_disable(struct net_device *dev)
return rc;
}
-static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
+static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
+ struct scatterlist *sgl, unsigned int sgc)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
- int rc = -EINVAL;
+ int rc = 0;
+
+ if (ops->ndo_fcoe_ddp_target)
+ rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc);
- if (ops->ndo_fcoe_get_wwn)
- rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
return rc;
}
+#endif
-static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
- struct scatterlist *sgl, unsigned int sgc)
+#ifdef NETDEV_FCOE_WWNN
+static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
- int rc = 0;
-
- if (ops->ndo_fcoe_ddp_target)
- rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc);
+ int rc = -EINVAL;
+ if (ops->ndo_fcoe_get_wwn)
+ rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
return rc;
}
#endif
@@ -794,9 +796,11 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
.ndo_fcoe_enable = vlan_dev_fcoe_enable,
.ndo_fcoe_disable = vlan_dev_fcoe_disable,
- .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
.ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target,
#endif
+#ifdef NETDEV_FCOE_WWNN
+ .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
+#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = vlan_dev_poll_controller,
.ndo_netpoll_setup = vlan_dev_netpoll_setup,
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 49a16cee2aae..420a98bf79b5 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = {
static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 };
-void __init aarp_proto_init(void)
+int __init aarp_proto_init(void)
{
+ int rc;
+
aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
- if (!aarp_dl)
+ if (!aarp_dl) {
printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
+ return -ENOMEM;
+ }
timer_setup(&aarp_timer, aarp_expire_timeout, 0);
aarp_timer.expires = jiffies + sysctl_aarp_expiry_time;
add_timer(&aarp_timer);
- register_netdevice_notifier(&aarp_notifier);
+ rc = register_netdevice_notifier(&aarp_notifier);
+ if (rc) {
+ del_timer_sync(&aarp_timer);
+ unregister_snap_client(aarp_dl);
+ }
+ return rc;
}
/* Remove the AARP entries associated with a device. */
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 795fbc6c06aa..709d2542f729 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1904,9 +1904,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B };
EXPORT_SYMBOL(atrtr_get_dev);
EXPORT_SYMBOL(atalk_find_dev_addr);
-static const char atalk_err_snap[] __initconst =
- KERN_CRIT "Unable to register DDP with SNAP.\n";
-
/* Called by proto.c on kernel start up */
static int __init atalk_init(void)
{
@@ -1921,17 +1918,22 @@ static int __init atalk_init(void)
goto out_proto;
ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
- if (!ddp_dl)
- printk(atalk_err_snap);
+ if (!ddp_dl) {
+ pr_crit("Unable to register DDP with SNAP.\n");
+ goto out_sock;
+ }
dev_add_pack(&ltalk_packet_type);
dev_add_pack(&ppptalk_packet_type);
rc = register_netdevice_notifier(&ddp_notifier);
if (rc)
- goto out_sock;
+ goto out_snap;
+
+ rc = aarp_proto_init();
+ if (rc)
+ goto out_dev;
- aarp_proto_init();
rc = atalk_proc_init();
if (rc)
goto out_aarp;
@@ -1945,11 +1947,13 @@ out_proc:
atalk_proc_exit();
out_aarp:
aarp_cleanup_module();
+out_dev:
unregister_netdevice_notifier(&ddp_notifier);
-out_sock:
+out_snap:
dev_remove_pack(&ppptalk_packet_type);
dev_remove_pack(&ltalk_packet_type);
unregister_snap_client(ddp_dl);
+out_sock:
sock_unregister(PF_APPLETALK);
out_proto:
proto_unregister(&ddp_proto);
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index a9b7919c9de5..d5df0114f08a 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -104,8 +104,10 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
- /* free the TID stats immediately */
- cfg80211_sinfo_release_content(&sinfo);
+ if (!ret) {
+ /* free the TID stats immediately */
+ cfg80211_sinfo_release_content(&sinfo);
+ }
dev_put(real_netdev);
if (ret == -ENOENT) {
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ef39aabdb694..4fb01108e5f5 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -803,6 +803,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
const u8 *mac, const unsigned short vid)
{
struct batadv_bla_claim search_claim, *claim;
+ struct batadv_bla_claim *claim_removed_entry;
+ struct hlist_node *claim_removed_node;
ether_addr_copy(search_claim.addr, mac);
search_claim.vid = vid;
@@ -813,10 +815,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__,
mac, batadv_print_vid(vid));
- batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
- batadv_choose_claim, claim);
- batadv_claim_put(claim); /* reference from the hash is gone */
+ claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash,
+ batadv_compare_claim,
+ batadv_choose_claim, claim);
+ if (!claim_removed_node)
+ goto free_claim;
+ /* reference from the hash is gone */
+ claim_removed_entry = hlist_entry(claim_removed_node,
+ struct batadv_bla_claim, hash_entry);
+ batadv_claim_put(claim_removed_entry);
+
+free_claim:
/* don't need the reference from hash_find() anymore */
batadv_claim_put(claim);
}
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0b4b3fb778a6..208655cf6717 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1116,9 +1116,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
struct attribute *attr,
char *buff, size_t count)
{
- struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
struct batadv_hard_iface *hard_iface;
+ struct batadv_priv *bat_priv;
u32 tp_override;
u32 old_tp_override;
bool ret;
@@ -1147,7 +1147,10 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
atomic_set(&hard_iface->bat_v.throughput_override, tp_override);
- batadv_netlink_notify_hardif(bat_priv, hard_iface);
+ if (hard_iface->soft_iface) {
+ bat_priv = netdev_priv(hard_iface->soft_iface);
+ batadv_netlink_notify_hardif(bat_priv, hard_iface);
+ }
out:
batadv_hardif_put(hard_iface);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index f73d79139ae7..26c4e2493ddf 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -616,14 +616,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry *tt_global,
const char *message)
{
+ struct batadv_tt_global_entry *tt_removed_entry;
+ struct hlist_node *tt_removed_node;
+
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
batadv_print_vid(tt_global->common.vid), message);
- batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
- batadv_choose_tt, &tt_global->common);
- batadv_tt_global_entry_put(tt_global);
+ tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash,
+ batadv_compare_tt,
+ batadv_choose_tt,
+ &tt_global->common);
+ if (!tt_removed_node)
+ return;
+
+ /* drop reference of remove hash entry */
+ tt_removed_entry = hlist_entry(tt_removed_node,
+ struct batadv_tt_global_entry,
+ common.hash_entry);
+ batadv_tt_global_entry_put(tt_removed_entry);
}
/**
@@ -1337,9 +1349,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid, const char *message,
bool roaming)
{
+ struct batadv_tt_local_entry *tt_removed_entry;
struct batadv_tt_local_entry *tt_local_entry;
u16 flags, curr_flags = BATADV_NO_FLAGS;
- void *tt_entry_exists;
+ struct hlist_node *tt_removed_node;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
@@ -1368,15 +1381,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
*/
batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
- tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash,
+ tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash,
batadv_compare_tt,
batadv_choose_tt,
&tt_local_entry->common);
- if (!tt_entry_exists)
+ if (!tt_removed_node)
goto out;
- /* extra call to free the local tt entry */
- batadv_tt_local_entry_put(tt_local_entry);
+ /* drop reference of remove hash entry */
+ tt_removed_entry = hlist_entry(tt_removed_node,
+ struct batadv_tt_local_entry,
+ common.hash_entry);
+ batadv_tt_local_entry_put(tt_removed_entry);
out:
if (tt_local_entry)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a0e369179f6d..02da21d771c9 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -601,6 +601,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
if (ipv4_is_local_multicast(group))
return 0;
+ memset(&br_group, 0, sizeof(br_group));
br_group.u.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
@@ -1497,6 +1498,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
+ memset(&br_group, 0, sizeof(br_group));
br_group.u.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
@@ -1520,6 +1522,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
+ memset(&br_group, 0, sizeof(br_group));
br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 9d34de68571b..22afa566cbce 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -502,6 +502,7 @@ static unsigned int br_nf_pre_routing(void *priv,
nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IP);
+ skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4;
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 564710f88f93..e88d6641647b 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -235,6 +235,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv,
nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IPV6);
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
br_nf_pre_routing_finish_ipv6);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 7e71b0df1fbc..3083988ce729 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -840,6 +840,7 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
size_t bytes)
{
struct ceph_bio_iter *it = &cursor->bio_iter;
+ struct page *page = bio_iter_page(it->bio, it->iter);
BUG_ON(bytes > cursor->resid);
BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
@@ -851,7 +852,8 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
return false; /* no more data */
}
- if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
+ if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done &&
+ page == bio_iter_page(it->bio, it->iter)))
return false; /* more bytes to process in this segment */
if (!it->iter.bi_size) {
@@ -899,6 +901,7 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
size_t bytes)
{
struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
+ struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter);
BUG_ON(bytes > cursor->resid);
BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
@@ -910,7 +913,8 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
return false; /* no more data */
}
- if (!bytes || cursor->bvec_iter.bi_bvec_done)
+ if (!bytes || (cursor->bvec_iter.bi_bvec_done &&
+ page == bvec_iter_page(bvecs, cursor->bvec_iter)))
return false; /* more bytes to process in this segment */
BUG_ON(cursor->last_piece);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b2651bb6d2a3..e657289db4ac 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -279,7 +279,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
break;
sk_busy_loop(sk, flags & MSG_DONTWAIT);
- } while (!skb_queue_empty(&sk->sk_receive_queue));
+ } while (sk->sk_receive_queue.prev != *last);
error = -EAGAIN;
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b67f2aa59dd..fdcff29df915 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5014,8 +5014,10 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
if (pt_prev->list_func != NULL)
pt_prev->list_func(head, pt_prev, orig_dev);
else
- list_for_each_entry_safe(skb, next, head, list)
+ list_for_each_entry_safe(skb, next, head, list) {
+ skb_list_del_init(skb);
pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ }
}
static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 78e22cea4cc7..da0a29f30885 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3897,6 +3897,11 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
continue;
}
+ if (!devlink->ops->info_get) {
+ idx++;
+ continue;
+ }
+
mutex_lock(&devlink->lock);
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
NETLINK_CB(cb->skb).portid,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b1eb32419732..36ed619faf36 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1797,11 +1797,16 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
WARN_ON_ONCE(!ret);
gstrings.len = ret;
- data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
- if (gstrings.len && !data)
- return -ENOMEM;
- __ethtool_get_strings(dev, gstrings.string_set, data);
+ if (gstrings.len) {
+ data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
+ if (!data)
+ return -ENOMEM;
+
+ __ethtool_get_strings(dev, gstrings.string_set, data);
+ } else {
+ data = NULL;
+ }
ret = -EFAULT;
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1897,11 +1902,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return -EFAULT;
stats.n_stats = n_stats;
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (n_stats && !data)
- return -ENOMEM;
- ops->get_ethtool_stats(dev, &stats, data);
+ if (n_stats) {
+ data = vzalloc(array_size(n_stats, sizeof(u64)));
+ if (!data)
+ return -ENOMEM;
+ ops->get_ethtool_stats(dev, &stats, data);
+ } else {
+ data = NULL;
+ }
ret = -EFAULT;
if (copy_to_user(useraddr, &stats, sizeof(stats)))
@@ -1941,16 +1950,21 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
return -EFAULT;
stats.n_stats = n_stats;
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (n_stats && !data)
- return -ENOMEM;
- if (dev->phydev && !ops->get_ethtool_phy_stats) {
- ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
- if (ret < 0)
- return ret;
+ if (n_stats) {
+ data = vzalloc(array_size(n_stats, sizeof(u64)));
+ if (!data)
+ return -ENOMEM;
+
+ if (dev->phydev && !ops->get_ethtool_phy_stats) {
+ ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
+ if (ret < 0)
+ goto out;
+ } else {
+ ops->get_ethtool_phy_stats(dev, &stats, data);
+ }
} else {
- ops->get_ethtool_phy_stats(dev, &stats, data);
+ data = NULL;
}
ret = -EFAULT;
diff --git a/net/core/filter.c b/net/core/filter.c
index f274620945ff..fc92ebc4e200 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
{
- sk = sk_to_full_sk(sk);
-
return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
}
@@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_SOCKET,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
};
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{
- sk = sk_to_full_sk(sk);
-
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
return (unsigned long)sk;
@@ -5422,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
};
+BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
+{
+ sk = sk_to_full_sk(sk);
+
+ if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_get_listener_sock_proto = {
+ .func = bpf_get_listener_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+};
+
BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{
unsigned int iphdr_len;
@@ -5607,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
+ case BPF_FUNC_get_listener_sock:
+ return &bpf_get_listener_sock_proto;
case BPF_FUNC_skb_ecn_set_ce:
return &bpf_skb_ecn_set_ce_proto;
#endif
@@ -5702,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_release_proto;
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
+ case BPF_FUNC_get_listener_sock:
+ return &bpf_get_listener_sock_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -6596,14 +6613,8 @@ static bool flow_dissector_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
- if (type == BPF_WRITE) {
- switch (off) {
- case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
- break;
- default:
- return false;
- }
- }
+ if (type == BPF_WRITE)
+ return false;
switch (off) {
case bpf_ctx_range(struct __sk_buff, data):
@@ -6615,11 +6626,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
info->reg_type = PTR_TO_FLOW_KEYS;
break;
- case bpf_ctx_range(struct __sk_buff, tc_classid):
- case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_till(struct __sk_buff, family, local_port):
- case bpf_ctx_range(struct __sk_buff, tstamp):
- case bpf_ctx_range(struct __sk_buff, wire_len):
+ default:
return false;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index bb1a54747d64..94a450b2191a 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -707,6 +707,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
/* Pass parameters to the BPF program */
memset(flow_keys, 0, sizeof(*flow_keys));
cb->qdisc_cb.flow_keys = flow_keys;
+ flow_keys->n_proto = skb->protocol;
flow_keys->nhoff = skb_network_offset(skb);
flow_keys->thoff = flow_keys->nhoff;
@@ -716,7 +717,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
/* Restore state */
memcpy(cb, &cb_saved, sizeof(cb_saved));
- flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len);
+ flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff,
+ skb_network_offset(skb), skb->len);
flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
flow_keys->nhoff, skb->len);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4ff661f6f989..f8f94303a1f5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -928,6 +928,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
if (error)
return error;
+ dev_hold(queue->dev);
+
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error) {
@@ -937,7 +939,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
}
kobject_uevent(kobj, KOBJ_ADD);
- dev_hold(queue->dev);
return error;
}
@@ -1464,6 +1465,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
return error;
+ dev_hold(queue->dev);
+
#ifdef CONFIG_BQL
error = sysfs_create_group(kobj, &dql_group);
if (error) {
@@ -1473,7 +1476,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
#endif
kobject_uevent(kobj, KOBJ_ADD);
- dev_hold(queue->dev);
return 0;
}
@@ -1745,16 +1747,20 @@ int netdev_register_kobject(struct net_device *ndev)
error = device_add(dev);
if (error)
- return error;
+ goto error_put_device;
error = register_queue_kobjects(ndev);
- if (error) {
- device_del(dev);
- return error;
- }
+ if (error)
+ goto error_device_del;
pm_runtime_set_memalloc_noio(dev, true);
+ return 0;
+
+error_device_del:
+ device_del(dev);
+error_put_device:
+ put_device(dev);
return error;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 17f36317363d..7e6dcc625701 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -304,6 +304,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
refcount_set(&net->count, 1);
refcount_set(&net->passive, 1);
+ get_random_bytes(&net->hash_mix, sizeof(u32));
net->dev_base_seq = 1;
net->user_ns = user_ns;
idr_init(&net->netns_ids);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2415d9cb9b89..ef2cd5712098 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3801,7 +3801,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int delta_truesize;
struct sk_buff *lp;
- if (unlikely(p->len + len >= 65536))
+ if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
lp = NAPI_GRO_CB(p)->last;
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index f227f002c73d..db87d9f58019 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
return -ENOMEM;
- return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
+ if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) {
+ kfree(fval.sp.vec);
+ return -ENOMEM;
+ }
+
+ return 0;
}
/**
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d5740bad5b18..57d84e9b7b6f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -436,8 +436,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
- newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
+ newnp->mcast_oif = inet_iif(skb);
+ newnp->mcast_hops = ip_hdr(skb)->ttl;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index ed4f6dc26365..85c22ada4744 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -98,8 +98,18 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
return skb;
}
+static int qca_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+ int *offset)
+{
+ *offset = QCA_HDR_LEN;
+ *proto = ((__be16 *)skb->data)[0];
+
+ return 0;
+}
+
const struct dsa_device_ops qca_netdev_ops = {
.xmit = qca_tag_xmit,
.rcv = qca_tag_rcv,
+ .flow_dissect = qca_tag_flow_dissect,
.overhead = QCA_HDR_LEN,
};
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fd219f7bd3ea..4b0526441476 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
@@ -282,9 +281,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb,
len,
htons(ETH_P_TEB),
@@ -292,8 +288,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
goto drop;
if (tunnel->collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
@@ -306,6 +303,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
md->version = ver;
md2 = &md->u.md2;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ecce2dc78f17..1132d6d1796a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -257,11 +257,10 @@ int ip_local_deliver(struct sk_buff *skb)
ip_local_deliver_finish);
}
-static inline bool ip_rcv_options(struct sk_buff *skb)
+static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
{
struct ip_options *opt;
const struct iphdr *iph;
- struct net_device *dev = skb->dev;
/* It looks as overkill, because not all
IP options require packet mangling.
@@ -297,7 +296,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
}
}
- if (ip_options_rcv_srr(skb))
+ if (ip_options_rcv_srr(skb, dev))
goto drop;
}
@@ -353,7 +352,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
}
#endif
- if (iph->ihl > 5 && ip_rcv_options(skb))
+ if (iph->ihl > 5 && ip_rcv_options(skb, dev))
goto drop;
rt = skb_rtable(skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 32a35043c9f5..3db31bb9df50 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -612,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb)
}
}
-int ip_options_rcv_srr(struct sk_buff *skb)
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
{
struct ip_options *opt = &(IPCB(skb)->opt);
int srrspace, srrptr;
@@ -647,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
orefdst = skb->_skb_refdst;
skb_dst_set(skb, NULL);
- err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
+ err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
rt2 = skb_rtable(skb);
if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
skb_dst_drop(skb);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index cd4814f7e962..359da68d7c06 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -67,11 +67,6 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
module_param(dctcp_alpha_on_init, uint, 0644);
MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
-static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
-module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
-MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
- "parameter for clamping alpha on loss");
-
static struct tcp_congestion_ops dctcp_reno;
static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
@@ -164,21 +159,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
}
}
-static void dctcp_state(struct sock *sk, u8 new_state)
+static void dctcp_react_to_loss(struct sock *sk)
{
- if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
- struct dctcp *ca = inet_csk_ca(sk);
+ struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
- /* If this extension is enabled, we clamp dctcp_alpha to
- * max on packet loss; the motivation is that dctcp_alpha
- * is an indicator to the extend of congestion and packet
- * loss is an indicator of extreme congestion; setting
- * this in practice turned out to be beneficial, and
- * effectively assumes total congestion which reduces the
- * window by half.
- */
- ca->dctcp_alpha = DCTCP_MAX_ALPHA;
- }
+ ca->loss_cwnd = tp->snd_cwnd;
+ tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
+static void dctcp_state(struct sock *sk, u8 new_state)
+{
+ if (new_state == TCP_CA_Recovery &&
+ new_state != inet_csk(sk)->icsk_ca_state)
+ dctcp_react_to_loss(sk);
+ /* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+ * one loss-adjustment per RTT.
+ */
}
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
@@ -190,6 +187,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
case CA_EVENT_ECN_NO_CE:
dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
break;
+ case CA_EVENT_LOSS:
+ dctcp_react_to_loss(sk);
+ break;
default:
/* Don't care for the rest. */
break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 277d71239d75..2f8039a26b08 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2578,7 +2578,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
{
int cpu;
- module_put(net->ipv4.tcp_congestion_control->owner);
+ if (net->ipv4.tcp_congestion_control)
+ module_put(net->ipv4.tcp_congestion_control->owner);
for_each_possible_cpu(cpu)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 79d2e43c05c5..5fc1f4e0c0cf 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -417,6 +417,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
done:
rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
return ret;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b32c95f02128..655e46b227f9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -525,10 +525,10 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
}
static int ip6erspan_rcv(struct sk_buff *skb,
- struct tnl_ptk_info *tpi)
+ struct tnl_ptk_info *tpi,
+ int gre_hdr_len)
{
struct erspan_base_hdr *ershdr;
- struct erspan_metadata *pkt_md;
const struct ipv6hdr *ipv6h;
struct erspan_md2 *md2;
struct ip6_tnl *tunnel;
@@ -547,18 +547,16 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, len)))
return PACKET_REJECT;
- ershdr = (struct erspan_base_hdr *)skb->data;
- pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
if (__iptunnel_pull_header(skb, len,
htons(ETH_P_TEB),
false, false) < 0)
return PACKET_REJECT;
if (tunnel->parms.collect_md) {
+ struct erspan_metadata *pkt_md, *md;
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
- struct erspan_metadata *md;
+ unsigned char *gh;
__be64 tun_id;
__be16 flags;
@@ -571,6 +569,14 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (!tun_dst)
return PACKET_REJECT;
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
info = &tun_dst->u.tun_info;
md = ip_tunnel_info_opts(info);
md->version = ver;
@@ -607,7 +613,7 @@ static int gre_rcv(struct sk_buff *skb)
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
tpi.proto == htons(ETH_P_ERSPAN2))) {
- if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD)
+ if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
goto out;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index edbd12067170..e51f3c648b09 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -601,7 +601,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
inet6_sk(skb->sk) : NULL;
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len;
+ unsigned int mtu, hlen, left, len, nexthdr_offset;
int hroom, troom;
__be32 frag_id;
int ptr, offset = 0, err = 0;
@@ -612,6 +612,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
goto fail;
hlen = err;
nexthdr = *prevhdr;
+ nexthdr_offset = prevhdr - skb_network_header(skb);
mtu = ip6_skb_dst_mtu(skb);
@@ -646,6 +647,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
(err = skb_checksum_help(skb)))
goto fail;
+ prevhdr = skb_network_header(skb) + nexthdr_offset;
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0c6403cf8b52..ade1390c6348 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -627,7 +627,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr, 0, 0,
IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
- if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
+ if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
@@ -636,7 +636,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} else {
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
- skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
+ skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
index 1059894a6f4c..4cb83fb69844 100644
--- a/net/ipv6/netfilter/ip6t_srh.c
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -210,6 +210,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
psidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left + 1) * sizeof(struct in6_addr));
psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid);
+ if (!psid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID,
ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk,
&srhinfo->psid_addr)))
@@ -223,6 +225,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left - 1) * sizeof(struct in6_addr));
nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid);
+ if (!nsid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID,
ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk,
&srhinfo->nsid_addr)))
@@ -233,6 +237,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
if (srhinfo->mt_flags & IP6T_SRH_LSID) {
lsidoff = srhoff + sizeof(struct ipv6_sr_hdr);
lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid);
+ if (!lsid)
+ return false;
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID,
ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk,
&srhinfo->lsid_addr)))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4ef4bbdb49d4..0302e0eb07af 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1040,14 +1040,20 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
struct rt6_info *nrt;
if (!fib6_info_hold_safe(rt))
- return NULL;
+ goto fallback;
nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
- if (nrt)
- ip6_rt_copy_init(nrt, rt);
- else
+ if (!nrt) {
fib6_info_release(rt);
+ goto fallback;
+ }
+ ip6_rt_copy_init(nrt, rt);
+ return nrt;
+
+fallback:
+ nrt = dev_net(dev)->ipv6.ip6_null_entry;
+ dst_hold(&nrt->dst);
return nrt;
}
@@ -1096,10 +1102,6 @@ restart:
dst_hold(&rt->dst);
} else {
rt = ip6_create_rt_rcu(f6i);
- if (!rt) {
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
- }
}
rcu_read_unlock();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 07e21a82ce4c..b2109b74857d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -669,6 +669,10 @@ static int ipip6_rcv(struct sk_buff *skb)
!net_eq(tunnel->net, dev_net(tunnel->dev))))
goto out;
+ /* skb can be uncloned in iptunnel_pull_header, so
+ * old iph is no longer valid
+ */
+ iph = (const struct iphdr *)skb_mac_header(skb);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 57ef69a10889..44d431849d39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1110,11 +1110,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
- newnp->mcast_oif = tcp_v6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ newnp->mcast_oif = inet_iif(skb);
+ newnp->mcast_hops = ip_hdr(skb)->ttl;
+ newnp->rcv_flowinfo = 0;
if (np->repflow)
- newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
+ newnp->flow_label = 0;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index c5c5ab6c5a1c..44fdc641710d 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -2054,14 +2054,14 @@ static int __init kcm_init(void)
if (err)
goto fail;
- err = sock_register(&kcm_family_ops);
- if (err)
- goto sock_register_fail;
-
err = register_pernet_device(&kcm_net_ops);
if (err)
goto net_ops_fail;
+ err = sock_register(&kcm_family_ops);
+ if (err)
+ goto sock_register_fail;
+
err = kcm_proc_init();
if (err)
goto proc_init_fail;
@@ -2069,12 +2069,12 @@ static int __init kcm_init(void)
return 0;
proc_init_fail:
- unregister_pernet_device(&kcm_net_ops);
-
-net_ops_fail:
sock_unregister(PF_KCM);
sock_register_fail:
+ unregister_pernet_device(&kcm_net_ops);
+
+net_ops_fail:
proto_unregister(&kcm_proto);
fail:
@@ -2090,8 +2090,8 @@ fail:
static void __exit kcm_exit(void)
{
kcm_proc_exit();
- unregister_pernet_device(&kcm_net_ops);
sock_unregister(PF_KCM);
+ unregister_pernet_device(&kcm_net_ops);
proto_unregister(&kcm_proto);
destroy_workqueue(kcm_wq);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index dda8930f20e7..f3a8557494d6 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -140,9 +140,15 @@ static int mpls_xmit(struct sk_buff *skb)
if (rt)
err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway,
skb);
- else if (rt6)
- err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
- skb);
+ else if (rt6) {
+ if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) {
+ /* 6PE (RFC 4798) */
+ err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3],
+ skb);
+ } else
+ err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
+ skb);
+ }
if (err)
net_dbg_ratelimited("%s: packet transmission failed: %d\n",
__func__, err);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 5d782445d2fc..bad17bba8ba7 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -251,6 +251,10 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
}
attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+ if (!attr) {
+ rc = -EMSGSIZE;
+ goto err;
+ }
rc = ncsi_write_package_info(skb, ndp, package->id);
if (rc) {
nla_nest_cancel(skb, attr);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d43ffb09939b..6548271209a0 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1007,6 +1007,7 @@ config NETFILTER_XT_TARGET_TEE
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
+ depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES
select NF_DUP_IPV4
select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index f067c6b50857..39fcc1ed18f3 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -20,9 +20,9 @@
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
-#include <net/route.h>
-#include <net/ip6_route.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -871,38 +871,33 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
} else if (sip_external_media) {
struct net_device *dev = skb_dst(skb)->dev;
struct net *net = dev_net(dev);
- struct rtable *rt;
- struct flowi4 fl4 = {};
-#if IS_ENABLED(CONFIG_IPV6)
- struct flowi6 fl6 = {};
-#endif
+ struct flowi fl;
struct dst_entry *dst = NULL;
+ memset(&fl, 0, sizeof(fl));
+
switch (nf_ct_l3num(ct)) {
case NFPROTO_IPV4:
- fl4.daddr = daddr->ip;
- rt = ip_route_output_key(net, &fl4);
- if (!IS_ERR(rt))
- dst = &rt->dst;
+ fl.u.ip4.daddr = daddr->ip;
+ nf_ip_route(net, &dst, &fl, false);
break;
-#if IS_ENABLED(CONFIG_IPV6)
case NFPROTO_IPV6:
- fl6.daddr = daddr->in6;
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- dst_release(dst);
- dst = NULL;
- }
+ fl.u.ip6.daddr = daddr->in6;
+ nf_ip6_route(net, &dst, &fl, false);
break;
-#endif
}
/* Don't predict any conntracks when media endpoint is reachable
* through the same interface as the signalling peer.
*/
- if (dst && dst->dev == dev)
- return NF_ACCEPT;
+ if (dst) {
+ bool external_media = (dst->dev == dev);
+
+ dst_release(dst);
+ if (external_media)
+ return NF_ACCEPT;
+ }
}
/* We need to check whether the registration exists before attempting
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 513f93118604..ef7772e976cc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2806,8 +2806,11 @@ err2:
nf_tables_rule_release(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
- if (info[i].ops != NULL)
+ if (info[i].ops) {
module_put(info[i].ops->type->owner);
+ if (info[i].ops->type->release_ops)
+ info[i].ops->type->release_ops(info[i].ops);
+ }
}
kvfree(info);
return err;
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 457a9ceb46af..8dfa798ea683 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -65,21 +65,34 @@ nla_put_failure:
return -1;
}
-static void nft_objref_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+static void nft_objref_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
struct nft_object *obj = nft_objref_priv(expr);
+ if (phase == NFT_TRANS_COMMIT)
+ return;
+
obj->use--;
}
+static void nft_objref_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+
+ obj->use++;
+}
+
static struct nft_expr_type nft_objref_type;
static const struct nft_expr_ops nft_objref_ops = {
.type = &nft_objref_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_object *)),
.eval = nft_objref_eval,
.init = nft_objref_init,
- .destroy = nft_objref_destroy,
+ .activate = nft_objref_activate,
+ .deactivate = nft_objref_deactivate,
.dump = nft_objref_dump,
};
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index f8092926f704..a340cd8a751b 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -233,5 +233,5 @@ module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET4, "redir");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index fa61208371f8..321a0036fdf5 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -308,10 +308,6 @@ static void *nft_rbtree_deactivate(const struct net *net,
else if (d > 0)
parent = parent->rb_right;
else {
- if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = parent->rb_left;
- continue;
- }
if (nft_rbtree_interval_end(rbe) &&
!nft_rbtree_interval_end(this)) {
parent = parent->rb_left;
@@ -320,6 +316,9 @@ static void *nft_rbtree_deactivate(const struct net *net,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
+ } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
}
nft_rbtree_flush(net, set, rbe);
return rbe;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 25eeb6d2a75a..f0ec068e1d02 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -366,7 +366,7 @@ int genl_register_family(struct genl_family *family)
start, end + 1, GFP_KERNEL);
if (family->id < 0) {
err = family->id;
- goto errout_locked;
+ goto errout_free;
}
err = genl_validate_assign_mc_groups(family);
@@ -385,6 +385,7 @@ int genl_register_family(struct genl_family *family)
errout_remove:
idr_remove(&genl_fam_idr, family->id);
+errout_free:
kfree(family->attrbuf);
errout_locked:
genl_unlock_all();
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ae296273ce3d..17dcd0b5eb32 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -726,6 +726,10 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
llcp_sock->service_name = kmemdup(addr->service_name,
llcp_sock->service_name_len,
GFP_KERNEL);
+ if (!llcp_sock->service_name) {
+ ret = -ENOMEM;
+ goto sock_llcp_release;
+ }
nfc_llcp_sock_link(&local->connecting_sockets, sk);
@@ -745,10 +749,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
return ret;
sock_unlink:
- nfc_llcp_put_ssap(local, llcp_sock->ssap);
-
nfc_llcp_sock_unlink(&local->connecting_sockets, sk);
+sock_llcp_release:
+ nfc_llcp_put_ssap(local, llcp_sock->ssap);
+
put_dev:
nfc_put_device(dev);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ddfc52ac1f9b..c0d323b58e73 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
create_info = (struct nci_hci_create_pipe_resp *)skb->data;
dest_gate = create_info->dest_gate;
new_pipe = create_info->pipe;
+ if (new_pipe >= NCI_HCI_MAX_PIPES) {
+ status = NCI_HCI_ANY_E_NOK;
+ goto exit;
+ }
/* Save the new created pipe and bind with local gate,
* the description for skb->data[3] is destination gate id
@@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
goto exit;
}
delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
+ if (delete_info->pipe >= NCI_HCI_MAX_PIPES) {
+ status = NCI_HCI_ANY_E_NOK;
+ goto exit;
+ }
ndev->hci_dev->pipes[delete_info->pipe].gate =
NCI_HCI_INVALID_GATE;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6679e96ab1dc..9dd158ab51b3 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -448,6 +448,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
0, upcall_info->cmd);
+ if (!upcall) {
+ err = -EINVAL;
+ goto out;
+ }
upcall->dp_ifindex = dp_ifindex;
err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
@@ -460,6 +464,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+ if (!nla) {
+ err = -EMSGSIZE;
+ goto out;
+ }
err = ovs_nla_put_tunnel_info(user_skb,
upcall_info->egress_tun_info);
BUG_ON(err);
@@ -468,6 +476,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (upcall_info->actions_len) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
+ if (!nla) {
+ err = -EMSGSIZE;
+ goto out;
+ }
err = ovs_nla_put_actions(upcall_info->actions,
upcall_info->actions_len,
user_skb);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 691da853bef5..4bdf5e3ac208 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2306,14 +2306,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
struct sw_flow_actions *acts;
int new_acts_size;
- int req_size = NLA_ALIGN(attr_len);
+ size_t req_size = NLA_ALIGN(attr_len);
int next_offset = offsetof(struct sw_flow_actions, actions) +
(*sfa)->actions_len;
if (req_size <= (ksize(*sfa) - next_offset))
goto out;
- new_acts_size = ksize(*sfa) * 2;
+ new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8376bc1c1508..9419c5cf4de5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1852,7 +1852,8 @@ oom:
static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
{
- if (!skb->protocol && sock->type == SOCK_RAW) {
+ if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
+ sock->type == SOCK_RAW) {
skb_reset_mac_header(skb);
skb->protocol = dev_parse_header_protocol(skb);
}
@@ -3243,7 +3244,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
}
mutex_lock(&net->packet.sklist_lock);
- sk_add_node_rcu(sk, &net->packet.sklist);
+ sk_add_node_tail_rcu(sk, &net->packet.sklist);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
@@ -4209,7 +4210,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
struct pgv *pg_vec;
int i;
- pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
+ pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
if (unlikely(!pg_vec))
goto out;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index fd2694174607..faf726e00e27 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -608,7 +608,7 @@ static void rds_tcp_kill_sock(struct net *net)
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
- if (net != c_net || !tc->t_sock)
+ if (net != c_net)
continue;
if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
list_move_tail(&tc->t_tcp_node, &tmp_list);
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 7ca57741b2fb..7849f286bb93 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype)
struct sk_buff *skb;
unsigned char *dptr;
unsigned char lci1, lci2;
- char buffer[100];
- int len, faclen = 0;
+ int maxfaclen = 0;
+ int len, faclen;
+ int reserve;
- len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1;
+ reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1;
+ len = ROSE_MIN_LEN;
switch (frametype) {
case ROSE_CALL_REQUEST:
len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN;
- faclen = rose_create_facilities(buffer, rose);
- len += faclen;
+ maxfaclen = 256;
break;
case ROSE_CALL_ACCEPTED:
case ROSE_CLEAR_REQUEST:
@@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype)
break;
}
- if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+ skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC);
+ if (!skb)
return;
/*
* Space for AX.25 header and PID.
*/
- skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1);
+ skb_reserve(skb, reserve);
- dptr = skb_put(skb, skb_tailroom(skb));
+ dptr = skb_put(skb, len);
lci1 = (rose->lci >> 8) & 0x0F;
lci2 = (rose->lci >> 0) & 0xFF;
@@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype)
dptr += ROSE_ADDR_LEN;
memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
dptr += ROSE_ADDR_LEN;
- memcpy(dptr, buffer, faclen);
+ faclen = rose_create_facilities(dptr, rose);
+ skb_put(skb, faclen);
dptr += faclen;
break;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 736aa9281100..004c762c2e8d 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -335,7 +335,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
struct kvec iov[2];
rxrpc_serial_t serial;
size_t len;
- bool lost = false;
int ret, opt;
_enter(",{%d}", skb->len);
@@ -393,14 +392,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
static int lose;
if ((lose++ & 7) == 7) {
ret = 0;
- lost = true;
+ trace_rxrpc_tx_data(call, sp->hdr.seq, serial,
+ whdr.flags, retrans, true);
+ goto done;
}
}
- trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
- retrans, lost);
- if (lost)
- goto done;
+ trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans,
+ false);
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 1b9afdee5ba9..5c02ad97ef23 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -358,8 +358,7 @@ config NET_SCH_PIE
help
Say Y here if you want to use the Proportional Integral controller
Enhanced scheduler packet scheduling algorithm.
- For more information, please see
- http://tools.ietf.org/html/draft-pan-tsvwg-pie-00
+ For more information, please see https://tools.ietf.org/html/rfc8033
To compile this driver as a module, choose M here: the module
will be called sch_pie.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index aecf1bf233c8..5a87e271d35a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -28,27 +28,10 @@
#include <net/act_api.h>
#include <net/netlink.h>
-static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
-{
- u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
-
- if (!tp)
- return -EINVAL;
- a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
- if (!a->goto_chain)
- return -ENOMEM;
- return 0;
-}
-
-static void tcf_action_goto_chain_fini(struct tc_action *a)
-{
- tcf_chain_put_by_act(a->goto_chain);
-}
-
static void tcf_action_goto_chain_exec(const struct tc_action *a,
struct tcf_result *res)
{
- const struct tcf_chain *chain = a->goto_chain;
+ const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
res->goto_tp = rcu_dereference_bh(chain->filter_chain);
}
@@ -71,6 +54,51 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
call_rcu(&old->rcu, tcf_free_cookie_rcu);
}
+int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
+ struct tcf_chain **newchain,
+ struct netlink_ext_ack *extack)
+{
+ int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL;
+ u32 chain_index;
+
+ if (!opcode)
+ ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0;
+ else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC)
+ ret = 0;
+ if (ret) {
+ NL_SET_ERR_MSG(extack, "invalid control action");
+ goto end;
+ }
+
+ if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) {
+ chain_index = action & TC_ACT_EXT_VAL_MASK;
+ if (!tp || !newchain) {
+ ret = -EINVAL;
+ NL_SET_ERR_MSG(extack,
+ "can't goto NULL proto/chain");
+ goto end;
+ }
+ *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index);
+ if (!*newchain) {
+ ret = -ENOMEM;
+ NL_SET_ERR_MSG(extack,
+ "can't allocate goto_chain");
+ }
+ }
+end:
+ return ret;
+}
+EXPORT_SYMBOL(tcf_action_check_ctrlact);
+
+struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
+ struct tcf_chain *goto_chain)
+{
+ a->tcfa_action = action;
+ rcu_swap_protected(a->goto_chain, goto_chain, 1);
+ return goto_chain;
+}
+EXPORT_SYMBOL(tcf_action_set_ctrlact);
+
/* XXX: For standalone actions, we don't need a RCU grace period either, because
* actions are always connected to filters and filters are already destroyed in
* RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -78,13 +106,15 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
*/
static void free_tcf(struct tc_action *p)
{
+ struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
+
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_bstats_hw);
free_percpu(p->cpu_qstats);
tcf_set_action_cookie(&p->act_cookie, NULL);
- if (p->goto_chain)
- tcf_action_goto_chain_fini(p);
+ if (chain)
+ tcf_chain_put_by_act(chain);
kfree(p);
}
@@ -654,6 +684,10 @@ repeat:
return TC_ACT_OK;
}
} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
+ if (unlikely(!rcu_access_pointer(a->goto_chain))) {
+ net_warn_ratelimited("can't go to NULL chain!\n");
+ return TC_ACT_SHOT;
+ }
tcf_action_goto_chain_exec(a, res);
}
@@ -800,15 +834,6 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
-static bool tcf_action_valid(int action)
-{
- int opcode = TC_ACT_EXT_OPCODE(action);
-
- if (!opcode)
- return action <= TC_ACT_VALUE_MAX;
- return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
-}
-
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
@@ -890,10 +915,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- rtnl_held, extack);
+ rtnl_held, tp, extack);
else
err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
- extack);
+ tp, extack);
if (err < 0)
goto err_mod;
@@ -907,18 +932,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
if (err != ACT_P_CREATED)
module_put(a_o->owner);
- if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
- err = tcf_action_goto_chain_init(a, tp);
- if (err) {
- tcf_action_destroy_1(a, bind);
- NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
- return ERR_PTR(err);
- }
- }
-
- if (!tcf_action_valid(a->tcfa_action)) {
+ if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
+ !rcu_access_pointer(a->goto_chain)) {
tcf_action_destroy_1(a, bind);
- NL_SET_ERR_MSG(extack, "Invalid control action value");
+ NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
return ERR_PTR(-EINVAL);
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index aa5c38d11a30..3841156aa09f 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -17,6 +17,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>
@@ -278,10 +279,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
int replace, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_bpf_cfg cfg, old;
struct tc_act_bpf *parm;
struct tcf_bpf *prog;
@@ -323,12 +325,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
return ret;
}
+ ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (ret < 0)
+ goto release_idr;
+
is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
is_ebpf = tb[TCA_ACT_BPF_FD];
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
ret = -EINVAL;
- goto out;
+ goto put_chain;
}
memset(&cfg, 0, sizeof(cfg));
@@ -336,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
tcf_bpf_init_from_efd(tb, &cfg);
if (ret < 0)
- goto out;
+ goto put_chain;
prog = to_bpf(*act);
@@ -350,10 +356,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (cfg.bpf_num_ops)
prog->bpf_num_ops = cfg.bpf_num_ops;
- prog->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch);
rcu_assign_pointer(prog->filter, cfg.filter);
spin_unlock_bh(&prog->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
if (res == ACT_P_CREATED) {
tcf_idr_insert(tn, *act);
} else {
@@ -363,9 +372,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
}
return res;
-out:
- tcf_idr_release(*act, bind);
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+release_idr:
+ tcf_idr_release(*act, bind);
return ret;
}
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 5d24993cccfe..32ae0cd6e31c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -21,6 +21,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_connmark.h>
#include <net/tc_act/tc_connmark.h>
@@ -97,13 +98,15 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_connmark_info *ci;
struct tc_connmark *parm;
- int ret = 0;
+ int ret = 0, err;
if (!nla)
return -EINVAL;
@@ -128,7 +131,11 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
}
ci = to_connmark(*a);
- ci->tcf_action = parm->action;
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
+ tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->net = net;
ci->zone = parm->zone;
@@ -142,15 +149,24 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
/* replacing action and zone */
spin_lock_bh(&ci->tcf_lock);
- ci->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->zone = parm->zone;
spin_unlock_bh(&ci->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
ret = 0;
}
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index c79aca29505e..0c77e7bdf6d5 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -33,6 +33,7 @@
#include <net/sctp/checksum.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_csum.h>
#include <net/tc_act/tc_csum.h>
@@ -46,12 +47,13 @@ static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_csum *parm;
struct tcf_csum *p;
int ret = 0, err;
@@ -87,21 +89,27 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
p = to_tcf_csum(*a);
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
params_new->update_flags = parm->update_flags;
spin_lock_bh(&p->tcf_lock);
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(p->params, params_new,
lockdep_is_held(&p->tcf_lock));
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (params_new)
kfree_rcu(params_new, rcu);
@@ -109,6 +117,12 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
/**
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 93da0004e9f4..e540e31069d7 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_gact.h>
#include <net/tc_act/tc_gact.h>
@@ -57,10 +58,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_gact *parm;
struct tcf_gact *gact;
int ret = 0;
@@ -116,10 +118,13 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
gact = to_gact(*a);
spin_lock_bh(&gact->tcf_lock);
- gact->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
#ifdef CONFIG_GACT_PROB
if (p_parm) {
gact->tcfg_paction = p_parm->paction;
@@ -133,9 +138,15 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
spin_unlock_bh(&gact->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 9b1f2b3990ee..31c6ffb6abe7 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -29,6 +29,7 @@
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
#include <linux/etherdevice.h>
@@ -469,11 +470,12 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_ife_params *p;
struct tcf_ife_info *ife;
u16 ife_type = ETH_P_IFE;
@@ -531,6 +533,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
ife = to_ife(*a);
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
p->flags = parm->flags;
if (parm->flags & IFE_ENCODE) {
@@ -563,13 +569,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST],
NULL, NULL);
- if (err) {
-metadata_parse_err:
- tcf_idr_release(*a, bind);
- kfree(p);
- return err;
- }
-
+ if (err)
+ goto metadata_parse_err;
err = populate_metalist(ife, tb2, exists, rtnl_held);
if (err)
goto metadata_parse_err;
@@ -581,21 +582,20 @@ metadata_parse_err:
* going to bail out
*/
err = use_all_metadata(ife, exists);
- if (err) {
- tcf_idr_release(*a, bind);
- kfree(p);
- return err;
- }
+ if (err)
+ goto metadata_parse_err;
}
if (exists)
spin_lock_bh(&ife->tcf_lock);
- ife->tcf_action = parm->action;
/* protected by tcf_lock when modifying existing action */
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(ife->params, p, 1);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (p)
kfree_rcu(p, rcu);
@@ -603,6 +603,13 @@ metadata_parse_err:
tcf_idr_insert(tn, *a);
return ret;
+metadata_parse_err:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ kfree(p);
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 98f5b6ea77b4..04a0b5c61194 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -97,7 +97,8 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- const struct tc_action_ops *ops, int ovr, int bind)
+ const struct tc_action_ops *ops, int ovr, int bind,
+ struct tcf_proto *tp)
{
struct tc_action_net *tn = net_generic(net, id);
struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -205,20 +206,20 @@ err1:
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
- bind);
+ bind, tp);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool unlocked,
+ int bind, bool unlocked, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
- bind);
+ bind, tp);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6692fd054617..17cc6bd4c57c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -94,10 +94,12 @@ static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
bool mac_header_xmit = false;
struct tc_mirred *parm;
struct tcf_mirred *m;
@@ -157,18 +159,23 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+
m = to_mirred(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&m->tcfm_list);
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
spin_lock_bh(&m->tcf_lock);
- m->tcf_action = parm->action;
- m->tcfm_eaction = parm->eaction;
if (parm->ifindex) {
dev = dev_get_by_index(net, parm->ifindex);
if (!dev) {
spin_unlock_bh(&m->tcf_lock);
- tcf_idr_release(*a, bind);
- return -ENODEV;
+ err = -ENODEV;
+ goto put_chain;
}
mac_header_xmit = dev_is_mac_header_xmit(dev);
rcu_swap_protected(m->tcfm_dev, dev,
@@ -177,7 +184,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev_put(dev);
m->tcfm_mac_header_xmit = mac_header_xmit;
}
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ m->tcfm_eaction = parm->eaction;
spin_unlock_bh(&m->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED) {
spin_lock(&mirred_list_lock);
@@ -188,6 +199,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 543eab9193f1..e91bb8eb81ec 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/tc_act/tc_nat.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/netlink.h>
@@ -38,10 +39,12 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_action **a, int ovr, int bind,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ bool rtnl_held, struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
struct nlattr *tb[TCA_NAT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
int ret = 0, err;
struct tcf_nat *p;
@@ -76,6 +79,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
} else {
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
p = to_tcf_nat(*a);
spin_lock_bh(&p->tcf_lock);
@@ -84,13 +90,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
p->mask = parm->mask;
p->flags = parm->flags;
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a80373878df7..287793abfaf9 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -23,6 +23,7 @@
#include <linux/tc_act/tc_pedit.h>
#include <net/tc_act/tc_pedit.h>
#include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/pkt_cls.h>
static unsigned int pedit_net_id;
static struct tc_action_ops act_pedit_ops;
@@ -138,10 +139,11 @@ nla_failure:
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_pedit_key *keys = NULL;
struct tcf_pedit_key_ex *keys_ex;
struct tc_pedit *parm;
@@ -205,6 +207,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_free;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0) {
+ ret = err;
+ goto out_release;
+ }
p = to_pedit(*a);
spin_lock_bh(&p->tcf_lock);
@@ -214,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (!keys) {
spin_unlock_bh(&p->tcf_lock);
ret = -ENOMEM;
- goto out_release;
+ goto put_chain;
}
kfree(p->tcfp_keys);
p->tcfp_keys = keys;
@@ -223,16 +230,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
memcpy(p->tcfp_keys, parm->keys, ksize);
p->tcfp_flags = parm->flags;
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
kfree(p->tcfp_keys_ex);
p->tcfp_keys_ex = keys_ex;
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
out_release:
tcf_idr_release(*a, bind);
out_free:
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8271a6263824..2b8581f6ab51 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <net/act_api.h>
#include <net/netlink.h>
+#include <net/pkt_cls.h>
struct tcf_police_params {
int tcfp_result;
@@ -83,10 +84,12 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
int ret = 0, tcfp_result = TC_ACT_OK, err, size;
struct nlattr *tb[TCA_POLICE_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_police *parm;
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
@@ -128,6 +131,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
police = to_police(*a);
if (parm->rate.rate) {
@@ -213,12 +219,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
if (new->peak_present)
police->tcfp_ptoks = new->tcfp_mtu_ptoks;
spin_unlock_bh(&police->tcfp_lock);
- police->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(police->params,
new,
lockdep_is_held(&police->tcf_lock));
spin_unlock_bh(&police->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (new)
kfree_rcu(new, rcu);
@@ -229,6 +237,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
failure:
qdisc_put_rtab(P_tab);
qdisc_put_rtab(R_tab);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
tcf_idr_release(*a, bind);
return err;
}
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 203e399e5c85..0f82d50ea232 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -22,6 +22,7 @@
#include <linux/tc_act/tc_sample.h>
#include <net/tc_act/tc_sample.h>
#include <net/psample.h>
+#include <net/pkt_cls.h>
#include <linux/if_arp.h>
@@ -37,14 +38,15 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
+ struct tcf_chain *goto_ch = NULL;
+ u32 psample_group_num, rate;
struct tc_sample *parm;
- u32 psample_group_num;
struct tcf_sample *s;
bool exists = false;
int ret, err;
@@ -79,19 +81,28 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+ rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+ if (!rate) {
+ NL_SET_ERR_MSG(extack, "invalid sample rate");
+ err = -EINVAL;
+ goto put_chain;
+ }
psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
psample_group = psample_group_get(net, psample_group_num);
if (!psample_group) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
s = to_sample(*a);
spin_lock_bh(&s->tcf_lock);
- s->tcf_action = parm->action;
- s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ s->rate = rate;
s->psample_group_num = psample_group_num;
RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -100,10 +111,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]);
}
spin_unlock_bh(&s->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_sample_cleanup(struct tc_action *a)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index d54cb608dbaf..23c8ca5615e5 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -18,6 +18,7 @@
#include <linux/rtnetlink.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
@@ -60,14 +61,26 @@ static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata)
return 0;
}
-static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata,
- struct tc_defact *p)
+static int reset_policy(struct tc_action *a, const struct nlattr *defdata,
+ struct tc_defact *p, struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
+ struct tcf_chain *goto_ch = NULL;
+ struct tcf_defact *d;
+ int err;
+
+ err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack);
+ if (err < 0)
+ return err;
+ d = to_defact(a);
spin_lock_bh(&d->tcf_lock);
- d->tcf_action = p->action;
+ goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch);
memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
spin_unlock_bh(&d->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+ return 0;
}
static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
@@ -78,10 +91,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_defact *parm;
struct tcf_defact *d;
bool exists = false;
@@ -122,27 +136,37 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
}
d = to_defact(*a);
- ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
- if (ret < 0) {
- tcf_idr_release(*a, bind);
- return ret;
- }
- d->tcf_action = parm->action;
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
+
+ err = alloc_defdata(d, tb[TCA_DEF_DATA]);
+ if (err < 0)
+ goto put_chain;
+
+ tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ret = ACT_P_CREATED;
} else {
- d = to_defact(*a);
-
if (!ovr) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
+ err = -EEXIST;
+ goto release_idr;
}
- reset_policy(d, tb[TCA_DEF_DATA], parm);
+ err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack);
+ if (err)
+ goto release_idr;
}
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 65879500b688..7e1d261a31d2 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -26,6 +26,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/dsfield.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_skbedit.h>
@@ -96,11 +97,13 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
struct tcf_skbedit_params *params_new;
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_skbedit *parm;
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
@@ -186,11 +189,14 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
params_new->flags = flags;
@@ -208,16 +214,24 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
params_new->mask = *mask;
spin_lock_bh(&d->tcf_lock);
- d->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(d->params, params_new,
lockdep_is_held(&d->tcf_lock));
spin_unlock_bh(&d->tcf_lock);
if (params_new)
kfree_rcu(params_new, rcu);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 7bac1d78e7a3..1d4c324d0a42 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -16,6 +16,7 @@
#include <linux/rtnetlink.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_skbmod.h>
#include <net/tc_act/tc_skbmod.h>
@@ -82,11 +83,13 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
struct tcf_skbmod_params *p, *p_old;
+ struct tcf_chain *goto_ch = NULL;
struct tc_skbmod *parm;
struct tcf_skbmod *d;
bool exists = false;
@@ -153,21 +156,24 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
d = to_skbmod(*a);
p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
if (unlikely(!p)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
p->flags = lflags;
- d->tcf_action = parm->action;
if (ovr)
spin_lock_bh(&d->tcf_lock);
/* Protected by tcf_lock if overwriting existing action. */
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
p_old = rcu_dereference_protected(d->skbmod_p, 1);
if (lflags & SKBMOD_F_DMAC)
@@ -183,10 +189,18 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (p_old)
kfree_rcu(p_old, rcu);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_skbmod_cleanup(struct tc_action *a)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 7c6591b991d5..d5aaf90a3971 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -17,6 +17,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
@@ -210,12 +211,14 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
struct metadata_dst *metadata = NULL;
+ struct tcf_chain *goto_ch = NULL;
struct tc_tunnel_key *parm;
struct tcf_tunnel_key *t;
bool exists = false;
@@ -359,6 +362,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
goto release_tun_meta;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0) {
+ ret = err;
+ exists = true;
+ goto release_tun_meta;
+ }
t = to_tunnel_key(*a);
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
@@ -366,23 +375,29 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
ret = -ENOMEM;
exists = true;
- goto release_tun_meta;
+ goto put_chain;
}
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
spin_lock_bh(&t->tcf_lock);
- t->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(t->params, params_new,
lockdep_is_held(&t->tcf_lock));
spin_unlock_bh(&t->tcf_lock);
tunnel_key_release_params(params_new);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
release_tun_meta:
if (metadata)
dst_release(&metadata->dst);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ac0061599225..0f40d0a74423 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -15,6 +15,7 @@
#include <linux/if_vlan.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_vlan.h>
#include <net/tc_act/tc_vlan.h>
@@ -105,10 +106,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_vlan_params *p;
struct tc_vlan *parm;
struct tcf_vlan *v;
@@ -200,12 +202,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
v = to_vlan(*a);
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
p->tcfv_action = action;
@@ -214,16 +220,24 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
p->tcfv_push_proto = push_proto;
spin_lock_bh(&v->tcf_lock);
- v->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock));
spin_unlock_bh(&v->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (p)
kfree_rcu(p, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_vlan_cleanup(struct tc_action *a)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index dc10525e90e7..99ae30c177c7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -367,7 +367,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
struct tcf_block *block = chain->block;
mutex_destroy(&chain->filter_chain_lock);
- kfree(chain);
+ kfree_rcu(chain, rcu);
if (free_block)
tcf_block_destroy(block);
}
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 459921bd3d87..a13bc351a414 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -130,6 +130,11 @@ static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
static void *mall_get(struct tcf_proto *tp, u32 handle)
{
+ struct cls_mall_head *head = rtnl_dereference(tp->root);
+
+ if (head && head->handle == handle)
+ return head;
+
return NULL;
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 1d2a12132abc..259d97bc2abd 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -211,6 +211,9 @@ struct cake_sched_data {
u8 ack_filter;
u8 atm_mode;
+ u32 fwmark_mask;
+ u16 fwmark_shft;
+
/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
u16 rate_shft;
ktime_t time_next_packet;
@@ -258,8 +261,7 @@ enum {
CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
CAKE_FLAG_INGRESS = BIT(2),
CAKE_FLAG_WASH = BIT(3),
- CAKE_FLAG_SPLIT_GSO = BIT(4),
- CAKE_FLAG_FWMARK = BIT(5)
+ CAKE_FLAG_SPLIT_GSO = BIT(4)
};
/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
@@ -1515,16 +1517,27 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
{
+ int wlen = skb_network_offset(skb);
u8 dscp;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
+ wlen += sizeof(struct iphdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
if (wash && dscp)
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
return dscp;
case htons(ETH_P_IPV6):
+ wlen += sizeof(struct ipv6hdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
if (wash && dscp)
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
@@ -1543,7 +1556,7 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
struct sk_buff *skb)
{
struct cake_sched_data *q = qdisc_priv(sch);
- u32 tin;
+ u32 tin, mark;
u8 dscp;
/* Tin selection: Default to diffserv-based selection, allow overriding
@@ -1551,14 +1564,13 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
*/
dscp = cake_handle_diffserv(skb,
q->rate_flags & CAKE_FLAG_WASH);
+ mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
- else if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */
- skb->mark &&
- skb->mark <= q->tin_cnt)
- tin = q->tin_order[skb->mark - 1];
+ else if (mark && mark <= q->tin_cnt)
+ tin = q->tin_order[mark - 1];
else if (TC_H_MAJ(skb->priority) == sch->handle &&
TC_H_MIN(skb->priority) > 0 &&
@@ -2172,6 +2184,7 @@ static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
[TCA_CAKE_MPU] = { .type = NLA_U32 },
[TCA_CAKE_INGRESS] = { .type = NLA_U32 },
[TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 },
+ [TCA_CAKE_FWMARK] = { .type = NLA_U32 },
};
static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
@@ -2619,10 +2632,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
}
if (tb[TCA_CAKE_FWMARK]) {
- if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
- q->rate_flags |= CAKE_FLAG_FWMARK;
- else
- q->rate_flags &= ~CAKE_FLAG_FWMARK;
+ q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
+ q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
}
if (q->tins) {
@@ -2784,8 +2795,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_FWMARK,
- !!(q->rate_flags & CAKE_FLAG_FWMARK)))
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4dc05409e3fb..114b9048ea7e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1358,9 +1358,11 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
+ __u32 qlen;
cl->xstats.avgidle = cl->avgidle;
cl->xstats.undertime = 0;
+ qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
if (cl->undertime != PSCHED_PASTPERFECT)
cl->xstats.undertime = cl->undertime - q->now;
@@ -1368,7 +1370,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
@@ -1665,17 +1667,13 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
- unsigned int qlen, backlog;
if (cl->filters || cl->children || cl == &q->link)
return -EBUSY;
sch_tree_lock(sch);
- qlen = cl->q->q.qlen;
- backlog = cl->q->qstats.backlog;
- qdisc_reset(cl->q);
- qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
+ qdisc_purge_queue(cl->q);
if (cl->next_alive)
cbq_deactivate_class(cl);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 09b800991065..430df9a55ec4 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -50,15 +50,6 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
return container_of(clc, struct drr_class, common);
}
-static void drr_purge_queue(struct drr_class *cl)
-{
- unsigned int len = cl->qdisc->q.qlen;
- unsigned int backlog = cl->qdisc->qstats.backlog;
-
- qdisc_reset(cl->qdisc);
- qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
[TCA_DRR_QUANTUM] = { .type = NLA_U32 },
};
@@ -167,7 +158,7 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
- drr_purge_queue(cl);
+ qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
sch_tree_unlock(sch);
@@ -269,7 +260,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct gnet_dump *d)
{
struct drr_class *cl = (struct drr_class *)arg;
- __u32 qlen = cl->qdisc->q.qlen;
+ __u32 qlen = qdisc_qlen_sum(cl->qdisc);
+ struct Qdisc *cl_q = cl->qdisc;
struct tc_drr_stats xstats;
memset(&xstats, 0, sizeof(xstats));
@@ -279,7 +271,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
+ gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 24cc220a3218..d2ab463f22ae 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -845,16 +845,6 @@ qdisc_peek_len(struct Qdisc *sch)
}
static void
-hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
-{
- unsigned int len = cl->qdisc->q.qlen;
- unsigned int backlog = cl->qdisc->qstats.backlog;
-
- qdisc_reset(cl->qdisc);
- qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
-static void
hfsc_adjust_levels(struct hfsc_class *cl)
{
struct hfsc_class *p;
@@ -1076,7 +1066,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
list_add_tail(&cl->siblings, &parent->children);
if (parent->level == 0)
- hfsc_purge_queue(sch, parent);
+ qdisc_purge_queue(parent->qdisc);
hfsc_adjust_levels(parent);
sch_tree_unlock(sch);
@@ -1112,7 +1102,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
list_del(&cl->siblings);
hfsc_adjust_levels(cl->cl_parent);
- hfsc_purge_queue(sch, cl);
+ qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
sch_tree_unlock(sch);
@@ -1328,8 +1318,9 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
struct tc_hfsc_stats xstats;
+ __u32 qlen;
- cl->qstats.backlog = cl->qdisc->qstats.backlog;
+ qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog);
xstats.level = cl->level;
xstats.period = cl->cl_vtperiod;
xstats.work = cl->cl_total;
@@ -1337,7 +1328,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 30f9da7e1076..2f9883b196e8 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1127,10 +1127,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
};
__u32 qlen = 0;
- if (!cl->level && cl->leaf.q) {
- qlen = cl->leaf.q->q.qlen;
- qs.backlog = cl->leaf.q->qstats.backlog;
- }
+ if (!cl->level && cl->leaf.q)
+ qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
+
cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
INT_MIN, INT_MAX);
cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
@@ -1270,13 +1269,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
- if (!cl->level) {
- unsigned int qlen = cl->leaf.q->q.qlen;
- unsigned int backlog = cl->leaf.q->qstats.backlog;
-
- qdisc_reset(cl->leaf.q);
- qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog);
- }
+ if (!cl->level)
+ qdisc_purge_queue(cl->leaf.q);
/* delete from hash and active; remainder in destroy_class */
qdisc_class_hash_remove(&q->clhash, &cl->common);
@@ -1404,12 +1398,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
classid, NULL);
sch_tree_lock(sch);
if (parent && !parent->level) {
- unsigned int qlen = parent->leaf.q->q.qlen;
- unsigned int backlog = parent->leaf.q->qstats.backlog;
-
/* turn parent into inner node */
- qdisc_reset(parent->leaf.q);
- qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog);
+ qdisc_purge_queue(parent->leaf.q);
qdisc_put(parent->leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 203659bc3906..3a3312467692 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -249,7 +249,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
sch = dev_queue->qdisc_sleeping;
if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+ qdisc_qstats_copy(d, sch) < 0)
return -1;
return 0;
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index d364e63c396d..ea0dc112b38d 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -561,8 +561,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
sch = dev_queue->qdisc_sleeping;
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &sch->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL,
- &sch->qstats, sch->q.qlen) < 0)
+ qdisc_qstats_copy(d, sch) < 0)
return -1;
}
return 0;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 7410ce4d0321..35b03ae08e0f 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -201,9 +201,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
for (i = q->bands; i < q->max_bands; i++) {
if (q->queues[i] != &noop_qdisc) {
struct Qdisc *child = q->queues[i];
+
q->queues[i] = &noop_qdisc;
- qdisc_tree_reduce_backlog(child, child->q.qlen,
- child->qstats.backlog);
+ qdisc_tree_flush_backlog(child);
qdisc_put(child);
}
}
@@ -225,9 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
qdisc_hash_add(child, true);
if (old != &noop_qdisc) {
- qdisc_tree_reduce_backlog(old,
- old->q.qlen,
- old->qstats.backlog);
+ qdisc_tree_flush_backlog(old);
qdisc_put(old);
}
sch_tree_unlock(sch);
@@ -344,7 +342,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
cl_q = q->queues[cl - 1];
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl_q->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+ qdisc_qstats_copy(d, cl_q) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 847141cd900f..d519b21535b3 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -216,12 +216,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
- for (i = q->bands; i < oldbands; i++) {
- struct Qdisc *child = q->queues[i];
-
- qdisc_tree_reduce_backlog(child, child->q.qlen,
- child->qstats.backlog);
- }
+ for (i = q->bands; i < oldbands; i++)
+ qdisc_tree_flush_backlog(q->queues[i]);
for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
@@ -365,7 +361,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
cl_q = q->queues[cl - 1];
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl_q->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+ qdisc_qstats_copy(d, cl_q) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 29f5c4a24688..1589364b54da 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -217,15 +217,6 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
return container_of(clc, struct qfq_class, common);
}
-static void qfq_purge_queue(struct qfq_class *cl)
-{
- unsigned int len = cl->qdisc->q.qlen;
- unsigned int backlog = cl->qdisc->qstats.backlog;
-
- qdisc_reset(cl->qdisc);
- qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
[TCA_QFQ_LMAX] = { .type = NLA_U32 },
@@ -551,7 +542,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
- qfq_purge_queue(cl);
+ qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
sch_tree_unlock(sch);
@@ -655,8 +646,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL,
- &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
+ qdisc_qstats_copy(d, cl->qdisc) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9df9942340ea..4e8c0abf6194 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -233,8 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
q->flags = ctl->flags;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
- q->qdisc->qstats.backlog);
+ qdisc_tree_flush_backlog(q->qdisc);
old_child = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index bab506b01a32..2419fdb75966 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -521,8 +521,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
qdisc_hash_add(child, true);
sch_tree_lock(sch);
- qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
- q->qdisc->qstats.backlog);
+ qdisc_tree_flush_backlog(q->qdisc);
qdisc_put(q->qdisc);
q->qdisc = child;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 206e4dbed12f..c7041999eb5d 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -895,7 +895,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
sch = dev_queue->qdisc_sleeping;
if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+ qdisc_qstats_copy(d, sch) < 0)
return -1;
return 0;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7f272a9070c5..f71578dbb9e3 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -391,8 +391,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (child) {
- qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
- q->qdisc->qstats.backlog);
+ qdisc_tree_flush_backlog(q->qdisc);
qdisc_put(q->qdisc);
q->qdisc = child;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6abc8b274270..951afdeea5e9 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -600,6 +600,7 @@ out:
static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
{
/* No address mapping for V4 sockets */
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
return sizeof(struct sockaddr_in);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6140471efd4b..9874e60c9b0d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -999,7 +999,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- kaddrs = vmemdup_user(addrs, addrs_size);
+ kaddrs = memdup_user(addrs, addrs_size);
if (unlikely(IS_ERR(kaddrs)))
return PTR_ERR(kaddrs);
@@ -1007,7 +1007,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
addr_buf = kaddrs;
while (walk_size < addrs_size) {
if (walk_size + sizeof(sa_family_t) > addrs_size) {
- kvfree(kaddrs);
+ kfree(kaddrs);
return -EINVAL;
}
@@ -1018,7 +1018,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
* causes the address buffer to overflow return EINVAL.
*/
if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
- kvfree(kaddrs);
+ kfree(kaddrs);
return -EINVAL;
}
addrcnt++;
@@ -1054,7 +1054,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
}
out:
- kvfree(kaddrs);
+ kfree(kaddrs);
return err;
}
@@ -1329,7 +1329,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- kaddrs = vmemdup_user(addrs, addrs_size);
+ kaddrs = memdup_user(addrs, addrs_size);
if (unlikely(IS_ERR(kaddrs)))
return PTR_ERR(kaddrs);
@@ -1349,7 +1349,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id);
out_free:
- kvfree(kaddrs);
+ kfree(kaddrs);
return err;
}
@@ -2920,6 +2920,9 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
return 0;
}
+ if (sctp_style(sk, TCP))
+ params.sack_assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.sack_assoc_id == SCTP_FUTURE_ASSOC ||
params.sack_assoc_id == SCTP_ALL_ASSOC) {
if (params.sack_delay) {
@@ -3024,6 +3027,9 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
return 0;
}
+ if (sctp_style(sk, TCP))
+ info.sinfo_assoc_id = SCTP_FUTURE_ASSOC;
+
if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC ||
info.sinfo_assoc_id == SCTP_ALL_ASSOC) {
sp->default_stream = info.sinfo_stream;
@@ -3081,6 +3087,9 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk,
return 0;
}
+ if (sctp_style(sk, TCP))
+ info.snd_assoc_id = SCTP_FUTURE_ASSOC;
+
if (info.snd_assoc_id == SCTP_FUTURE_ASSOC ||
info.snd_assoc_id == SCTP_ALL_ASSOC) {
sp->default_stream = info.snd_sid;
@@ -3531,6 +3540,9 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
return 0;
}
+ if (sctp_style(sk, TCP))
+ params.assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.assoc_id == SCTP_FUTURE_ASSOC ||
params.assoc_id == SCTP_ALL_ASSOC)
sp->default_rcv_context = params.assoc_value;
@@ -3670,6 +3682,9 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
return 0;
}
+ if (sctp_style(sk, TCP))
+ params.assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.assoc_id == SCTP_FUTURE_ASSOC ||
params.assoc_id == SCTP_ALL_ASSOC)
sp->max_burst = params.assoc_value;
@@ -3798,6 +3813,9 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
goto out;
}
+ if (sctp_style(sk, TCP))
+ authkey->sca_assoc_id = SCTP_FUTURE_ASSOC;
+
if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC ||
authkey->sca_assoc_id == SCTP_ALL_ASSOC) {
ret = sctp_auth_set_key(ep, asoc, authkey);
@@ -3853,6 +3871,9 @@ static int sctp_setsockopt_active_key(struct sock *sk,
if (asoc)
return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
+ if (sctp_style(sk, TCP))
+ val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
val.scact_assoc_id == SCTP_ALL_ASSOC) {
ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
@@ -3904,6 +3925,9 @@ static int sctp_setsockopt_del_key(struct sock *sk,
if (asoc)
return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
+ if (sctp_style(sk, TCP))
+ val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
val.scact_assoc_id == SCTP_ALL_ASSOC) {
ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
@@ -3954,6 +3978,9 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
if (asoc)
return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+ if (sctp_style(sk, TCP))
+ val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+
if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
val.scact_assoc_id == SCTP_ALL_ASSOC) {
ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
@@ -4169,6 +4196,9 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk,
goto out;
}
+ if (sctp_style(sk, TCP))
+ info.pr_assoc_id = SCTP_FUTURE_ASSOC;
+
if (info.pr_assoc_id == SCTP_FUTURE_ASSOC ||
info.pr_assoc_id == SCTP_ALL_ASSOC) {
SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
@@ -4251,6 +4281,9 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk,
goto out;
}
+ if (sctp_style(sk, TCP))
+ params.assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.assoc_id == SCTP_FUTURE_ASSOC ||
params.assoc_id == SCTP_ALL_ASSOC)
ep->strreset_enable = params.assoc_value;
@@ -4376,6 +4409,9 @@ static int sctp_setsockopt_scheduler(struct sock *sk,
if (asoc)
return sctp_sched_set_sched(asoc, params.assoc_value);
+ if (sctp_style(sk, TCP))
+ params.assoc_id = SCTP_FUTURE_ASSOC;
+
if (params.assoc_id == SCTP_FUTURE_ASSOC ||
params.assoc_id == SCTP_ALL_ASSOC)
sp->default_ss = params.assoc_value;
@@ -4541,6 +4577,9 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval,
if (asoc)
return sctp_assoc_ulpevent_type_set(&param, asoc);
+ if (sctp_style(sk, TCP))
+ param.se_assoc_id = SCTP_FUTURE_ASSOC;
+
if (param.se_assoc_id == SCTP_FUTURE_ASSOC ||
param.se_assoc_id == SCTP_ALL_ASSOC)
sctp_ulpevent_type_set(&sp->subscribe,
@@ -9169,7 +9208,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
{
int ancestor_size = sizeof(struct inet_sock) +
sizeof(struct sctp_sock) -
- offsetof(struct sctp_sock, auto_asconf_list);
+ offsetof(struct sctp_sock, pd_lobby);
if (sk_from->sk_family == PF_INET6)
ancestor_size += sizeof(struct ipv6_pinfo);
@@ -9253,7 +9292,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby.
* 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
*/
- skb_queue_head_init(&newsp->pd_lobby);
atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
diff --git a/net/socket.c b/net/socket.c
index 3c176a12fe48..8255f5bda0aa 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -384,6 +384,18 @@ static struct file_system_type sock_fs_type = {
* but we take care of internal coherence yet.
*/
+/**
+ * sock_alloc_file - Bind a &socket to a &file
+ * @sock: socket
+ * @flags: file status flags
+ * @dname: protocol name
+ *
+ * Returns the &file bound with @sock, implicitly storing it
+ * in sock->file. If dname is %NULL, sets to "".
+ * On failure the return is a ERR pointer (see linux/err.h).
+ * This function uses GFP_KERNEL internally.
+ */
+
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
struct file *file;
@@ -424,6 +436,14 @@ static int sock_map_fd(struct socket *sock, int flags)
return PTR_ERR(newfile);
}
+/**
+ * sock_from_file - Return the &socket bounded to @file.
+ * @file: file
+ * @err: pointer to an error code return
+ *
+ * On failure returns %NULL and assigns -ENOTSOCK to @err.
+ */
+
struct socket *sock_from_file(struct file *file, int *err)
{
if (file->f_op == &socket_file_ops)
@@ -532,11 +552,11 @@ static const struct inode_operations sockfs_inode_ops = {
};
/**
- * sock_alloc - allocate a socket
+ * sock_alloc - allocate a socket
*
* Allocate a new inode and socket object. The two are bound together
* and initialised. The socket is then returned. If we are out of inodes
- * NULL is returned.
+ * NULL is returned. This functions uses GFP_KERNEL internally.
*/
struct socket *sock_alloc(void)
@@ -561,7 +581,7 @@ struct socket *sock_alloc(void)
EXPORT_SYMBOL(sock_alloc);
/**
- * sock_release - close a socket
+ * sock_release - close a socket
* @sock: socket to close
*
* The socket is released from the protocol stack if it has a release
@@ -617,6 +637,15 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
}
EXPORT_SYMBOL(__sock_tx_timestamp);
+/**
+ * sock_sendmsg - send a message through @sock
+ * @sock: socket
+ * @msg: message to send
+ *
+ * Sends @msg through @sock, passing through LSM.
+ * Returns the number of bytes sent, or an error code.
+ */
+
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
@@ -633,6 +662,18 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg)
}
EXPORT_SYMBOL(sock_sendmsg);
+/**
+ * kernel_sendmsg - send a message through @sock (kernel-space)
+ * @sock: socket
+ * @msg: message header
+ * @vec: kernel vec
+ * @num: vec array length
+ * @size: total message data size
+ *
+ * Builds the message data with @vec and sends it through @sock.
+ * Returns the number of bytes sent, or an error code.
+ */
+
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
@@ -641,6 +682,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL(kernel_sendmsg);
+/**
+ * kernel_sendmsg_locked - send a message through @sock (kernel-space)
+ * @sk: sock
+ * @msg: message header
+ * @vec: output s/g array
+ * @num: output s/g array length
+ * @size: total message data size
+ *
+ * Builds the message data with @vec and sends it through @sock.
+ * Returns the number of bytes sent, or an error code.
+ * Caller must hold @sk.
+ */
+
int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
@@ -811,6 +865,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+/**
+ * sock_recvmsg - receive a message from @sock
+ * @sock: socket
+ * @msg: message to receive
+ * @flags: message flags
+ *
+ * Receives @msg from @sock, passing through LSM. Returns the total number
+ * of bytes received, or an error.
+ */
+
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
@@ -826,20 +890,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
EXPORT_SYMBOL(sock_recvmsg);
/**
- * kernel_recvmsg - Receive a message from a socket (kernel space)
- * @sock: The socket to receive the message from
- * @msg: Received message
- * @vec: Input s/g array for message data
- * @num: Size of input s/g array
- * @size: Number of bytes to read
- * @flags: Message flags (MSG_DONTWAIT, etc...)
+ * kernel_recvmsg - Receive a message from a socket (kernel space)
+ * @sock: The socket to receive the message from
+ * @msg: Received message
+ * @vec: Input s/g array for message data
+ * @num: Size of input s/g array
+ * @size: Number of bytes to read
+ * @flags: Message flags (MSG_DONTWAIT, etc...)
*
- * On return the msg structure contains the scatter/gather array passed in the
- * vec argument. The array is modified so that it consists of the unfilled
- * portion of the original array.
+ * On return the msg structure contains the scatter/gather array passed in the
+ * vec argument. The array is modified so that it consists of the unfilled
+ * portion of the original array.
*
- * The returned value is the total number of bytes received, or an error.
+ * The returned value is the total number of bytes received, or an error.
*/
+
int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size, int flags)
{
@@ -1005,6 +1070,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* what to do with it - that's up to the protocol still.
*/
+/**
+ * get_net_ns - increment the refcount of the network namespace
+ * @ns: common namespace (net)
+ *
+ * Returns the net's common namespace.
+ */
+
struct ns_common *get_net_ns(struct ns_common *ns)
{
return &get_net(container_of(ns, struct net, ns))->ns;
@@ -1099,6 +1171,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return err;
}
+/**
+ * sock_create_lite - creates a socket
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * Creates a new socket and assigns it to @res, passing through LSM.
+ * The new socket initialization is not complete, see kernel_accept().
+ * Returns 0 or an error. On failure @res is set to %NULL.
+ * This function internally uses GFP_KERNEL.
+ */
+
int sock_create_lite(int family, int type, int protocol, struct socket **res)
{
int err;
@@ -1224,6 +1309,21 @@ call_kill:
}
EXPORT_SYMBOL(sock_wake_async);
+/**
+ * __sock_create - creates a socket
+ * @net: net namespace
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ * @kern: boolean for kernel space sockets
+ *
+ * Creates a new socket and assigns it to @res, passing through LSM.
+ * Returns 0 or an error. On failure @res is set to %NULL. @kern must
+ * be set to true if the socket resides in kernel space.
+ * This function internally uses GFP_KERNEL.
+ */
+
int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
@@ -1333,12 +1433,35 @@ out_release:
}
EXPORT_SYMBOL(__sock_create);
+/**
+ * sock_create - creates a socket
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * A wrapper around __sock_create().
+ * Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
EXPORT_SYMBOL(sock_create);
+/**
+ * sock_create_kern - creates a socket (kernel space)
+ * @net: net namespace
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * A wrapper around __sock_create().
+ * Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
{
return __sock_create(net, family, type, protocol, res, 1);
@@ -3322,18 +3445,46 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
}
#endif
+/**
+ * kernel_bind - bind an address to a socket (kernel space)
+ * @sock: socket
+ * @addr: address
+ * @addrlen: length of address
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
return sock->ops->bind(sock, addr, addrlen);
}
EXPORT_SYMBOL(kernel_bind);
+/**
+ * kernel_listen - move socket to listening state (kernel space)
+ * @sock: socket
+ * @backlog: pending connections queue size
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_listen(struct socket *sock, int backlog)
{
return sock->ops->listen(sock, backlog);
}
EXPORT_SYMBOL(kernel_listen);
+/**
+ * kernel_accept - accept a connection (kernel space)
+ * @sock: listening socket
+ * @newsock: new connected socket
+ * @flags: flags
+ *
+ * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
+ * If it fails, @newsock is guaranteed to be %NULL.
+ * Returns 0 or an error.
+ */
+
int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
@@ -3359,6 +3510,19 @@ done:
}
EXPORT_SYMBOL(kernel_accept);
+/**
+ * kernel_connect - connect a socket (kernel space)
+ * @sock: socket
+ * @addr: address
+ * @addrlen: address length
+ * @flags: flags (O_NONBLOCK, ...)
+ *
+ * For datagram sockets, @addr is the addres to which datagrams are sent
+ * by default, and the only address from which datagrams are received.
+ * For stream sockets, attempts to connect to @addr.
+ * Returns 0 or an error code.
+ */
+
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags)
{
@@ -3366,18 +3530,48 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
}
EXPORT_SYMBOL(kernel_connect);
+/**
+ * kernel_getsockname - get the address which the socket is bound (kernel space)
+ * @sock: socket
+ * @addr: address holder
+ *
+ * Fills the @addr pointer with the address which the socket is bound.
+ * Returns 0 or an error code.
+ */
+
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
return sock->ops->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
+/**
+ * kernel_peername - get the address which the socket is connected (kernel space)
+ * @sock: socket
+ * @addr: address holder
+ *
+ * Fills the @addr pointer with the address which the socket is connected.
+ * Returns 0 or an error code.
+ */
+
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
return sock->ops->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
+/**
+ * kernel_getsockopt - get a socket option (kernel space)
+ * @sock: socket
+ * @level: API level (SOL_SOCKET, ...)
+ * @optname: option tag
+ * @optval: option value
+ * @optlen: option length
+ *
+ * Assigns the option length to @optlen.
+ * Returns 0 or an error.
+ */
+
int kernel_getsockopt(struct socket *sock, int level, int optname,
char *optval, int *optlen)
{
@@ -3400,6 +3594,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(kernel_getsockopt);
+/**
+ * kernel_setsockopt - set a socket option (kernel space)
+ * @sock: socket
+ * @level: API level (SOL_SOCKET, ...)
+ * @optname: option tag
+ * @optval: option value
+ * @optlen: option length
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_setsockopt(struct socket *sock, int level, int optname,
char *optval, unsigned int optlen)
{
@@ -3420,6 +3625,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(kernel_setsockopt);
+/**
+ * kernel_sendpage - send a &page through a socket (kernel space)
+ * @sock: socket
+ * @page: page
+ * @offset: page offset
+ * @size: total size in bytes
+ * @flags: flags (MSG_DONTWAIT, ...)
+ *
+ * Returns the total amount sent in bytes or an error.
+ */
+
int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags)
{
@@ -3430,6 +3646,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage);
+/**
+ * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
+ * @sk: sock
+ * @page: page
+ * @offset: page offset
+ * @size: total size in bytes
+ * @flags: flags (MSG_DONTWAIT, ...)
+ *
+ * Returns the total amount sent in bytes or an error.
+ * Caller must hold @sk.
+ */
+
int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
@@ -3443,17 +3671,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage_locked);
+/**
+ * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
+ * @sock: socket
+ * @how: connection part
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
return sock->ops->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
-/* This routine returns the IP overhead imposed by a socket i.e.
- * the length of the underlying IP header, depending on whether
- * this is an IPv4 or IPv6 socket and the length from IP options turned
- * on at the socket. Assumes that the caller has a lock on the socket.
+/**
+ * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
+ * @sk: socket
+ *
+ * This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket. Assumes that the caller has a lock on the socket.
*/
+
u32 kernel_sock_ip_overhead(struct sock *sk)
{
struct inet_sock *inet;
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index da1a676860ca..860dcfb95ee4 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -550,6 +550,8 @@ EXPORT_SYMBOL_GPL(strp_check_rcv);
static int __init strp_mod_init(void)
{
strp_wq = create_singlethread_workqueue("kstrp");
+ if (unlikely(!strp_wq))
+ return -ENOMEM;
return 0;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 228970e6e52b..187d10443a15 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2311,6 +2311,15 @@ out_exit:
rpc_exit(task, status);
}
+static bool
+rpc_check_connected(const struct rpc_rqst *req)
+{
+ /* No allocated request or transport? return true */
+ if (!req || !req->rq_xprt)
+ return true;
+ return xprt_connected(req->rq_xprt);
+}
+
static void
rpc_check_timeout(struct rpc_task *task)
{
@@ -2322,10 +2331,11 @@ rpc_check_timeout(struct rpc_task *task)
dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid);
task->tk_timeouts++;
- if (RPC_IS_SOFTCONN(task)) {
+ if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
rpc_exit(task, -ETIMEDOUT);
return;
}
+
if (RPC_IS_SOFT(task)) {
if (clnt->cl_chatty) {
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9359539907ba..732d4b57411a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -495,8 +495,8 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
int flags, struct rpc_rqst *req)
{
struct xdr_buf *buf = &req->rq_private_buf;
- size_t want, read;
- ssize_t ret;
+ size_t want, uninitialized_var(read);
+ ssize_t uninitialized_var(ret);
xs_read_header(transport, buf);
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 06fee142f09f..63f39201e41e 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -919,6 +919,9 @@ int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb)
{
struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP);
+ if (!group)
+ return -EMSGSIZE;
+
if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID,
grp->type) ||
nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE,
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f076edb74338..7ce1e86b024f 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -163,12 +163,9 @@ void tipc_sched_net_finalize(struct net *net, u32 addr)
void tipc_net_stop(struct net *net)
{
- u32 self = tipc_own_addr(net);
-
- if (!self)
+ if (!tipc_own_id(net))
return;
- tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self);
rtnl_lock();
tipc_bearer_stop(net);
tipc_node_stop(net);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4ad3586da8f0..340a6e7c43a7 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
if (msg->rep_type)
tipc_tlv_init(msg->rep, msg->rep_type);
- if (cmd->header)
- (*cmd->header)(msg);
+ if (cmd->header) {
+ err = (*cmd->header)(msg);
+ if (err) {
+ kfree_skb(msg->rep);
+ msg->rep = NULL;
+ return err;
+ }
+ }
arg = nlmsg_new(0, GFP_KERNEL);
if (!arg) {
@@ -397,7 +403,12 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
if (!bearer)
return -EMSGSIZE;
- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ len -= offsetof(struct tipc_bearer_config, name);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(b->name, len))
return -EINVAL;
@@ -766,7 +777,12 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
lc = (struct tipc_link_config *)TLV_DATA(msg->req);
- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ len -= offsetof(struct tipc_link_config, name);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_LINK_NAME);
if (!string_is_valid(lc->name, len))
return -EINVAL;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2dc4919ab23c..dd3b6dc17662 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -817,10 +817,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
{
struct tipc_link_entry *le = &n->links[bearer_id];
+ struct tipc_media_addr *maddr = NULL;
struct tipc_link *l = le->link;
- struct tipc_media_addr *maddr;
- struct sk_buff_head xmitq;
int old_bearer_id = bearer_id;
+ struct sk_buff_head xmitq;
if (!l)
return;
@@ -844,7 +844,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
tipc_node_write_unlock(n);
if (delete)
tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
- tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
+ if (!skb_queue_empty(&xmitq))
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
tipc_sk_rcv(n->net, &le->inputq);
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3274ef625dba..b542f14ed444 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2349,6 +2349,16 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
return 0;
}
+static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr)
+{
+ if (addr->family != AF_TIPC)
+ return false;
+ if (addr->addrtype == TIPC_SERVICE_RANGE)
+ return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper);
+ return (addr->addrtype == TIPC_SERVICE_ADDR ||
+ addr->addrtype == TIPC_SOCKET_ADDR);
+}
+
/**
* tipc_connect - establish a connection to another TIPC port
* @sock: socket structure
@@ -2384,18 +2394,18 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
if (!tipc_sk_type_connectionless(sk))
res = -EINVAL;
goto exit;
- } else if (dst->family != AF_TIPC) {
- res = -EINVAL;
}
- if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
+ if (!tipc_sockaddr_is_sane(dst)) {
res = -EINVAL;
- if (res)
goto exit;
-
+ }
/* DGRAM/RDM connect(), just save the destaddr */
if (tipc_sk_type_connectionless(sk)) {
memcpy(&tsk->peer, dest, destlen);
goto exit;
+ } else if (dst->addrtype == TIPC_SERVICE_RANGE) {
+ res = -EINVAL;
+ goto exit;
}
previous = sk->sk_state;
@@ -3255,6 +3265,8 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
peer_port = tsk_peer_port(tsk);
nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);
+ if (!nest)
+ return -EMSGSIZE;
if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
goto msg_full;
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 4a708a4e8583..b45932d78004 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -363,6 +363,7 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
struct tipc_subscription *sub;
if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+ s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL);
tipc_conn_delete_sub(con, s);
return 0;
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 425351ac2a9b..20b191227969 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1484,6 +1484,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
return err;
}
+ } else {
+ *zc = false;
}
rxm->full_len -= padding_length(ctx, tls_ctx, skb);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 77520eacee8f..989e52386c35 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -193,9 +193,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
static void xdp_umem_release(struct xdp_umem *umem)
{
- struct task_struct *task;
- struct mm_struct *mm;
-
xdp_umem_clear_dev(umem);
ida_simple_remove(&umem_ida, umem->id);
@@ -214,21 +211,10 @@ static void xdp_umem_release(struct xdp_umem *umem)
xdp_umem_unpin_pages(umem);
- task = get_pid_task(umem->pid, PIDTYPE_PID);
- put_pid(umem->pid);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
-
- mmput(mm);
kfree(umem->pages);
umem->pages = NULL;
xdp_umem_unaccount_pages(umem);
-out:
kfree(umem);
}
@@ -357,7 +343,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (size_chk < 0)
return -EINVAL;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
umem->address = (unsigned long)addr;
umem->chunk_mask = ~((u64)chunk_size - 1);
umem->size = size;
@@ -373,7 +358,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
err = xdp_umem_account_pages(umem);
if (err)
- goto out;
+ return err;
err = xdp_umem_pin_pages(umem);
if (err)
@@ -392,8 +377,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
out_account:
xdp_umem_unaccount_pages(umem);
-out:
- put_pid(umem->pid);
return err;
}
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 2554a15ecf2b..76ca30cc4791 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -199,11 +199,8 @@ sub_cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
"$(if $(part-of-module),1,0)" "$(@)";
recordmcount_source := $(srctree)/scripts/recordmcount.pl
endif # BUILD_C_RECORDMCOUNT
-cmd_record_mcount = \
- if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \
- "$(CC_FLAGS_FTRACE)" ]; then \
- $(sub_cmd_record_mcount) \
- fi
+cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)), \
+ $(sub_cmd_record_mcount))
endif # CC_USING_RECORD_MCOUNT
endif # CONFIG_FTRACE_MCOUNT_RECORD
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 5b756278df13..a09333fd7cef 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5977,7 +5977,7 @@ sub process {
while ($fmt =~ /(\%[\*\d\.]*p(\w))/g) {
$specifier = $1;
$extension = $2;
- if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOx]/) {
+ if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOxt]/) {
$bad_specifier = $specifier;
last;
}
diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci
new file mode 100644
index 000000000000..350145da7669
--- /dev/null
+++ b/scripts/coccinelle/api/stream_open.cocci
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+// Author: Kirill Smelkov (kirr@nexedi.com)
+//
+// Search for stream-like files that are using nonseekable_open and convert
+// them to stream_open. A stream-like file is a file that does not use ppos in
+// its read and write. Rationale for the conversion is to avoid deadlock in
+// between read and write.
+
+virtual report
+virtual patch
+virtual explain // explain decisions in the patch (SPFLAGS="-D explain")
+
+// stream-like reader & writer - ones that do not depend on f_pos.
+@ stream_reader @
+identifier readstream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+ ssize_t readstream(struct file *f, char *buf, size_t len, loff_t *ppos)
+ {
+ ... when != ppos
+ }
+
+@ stream_writer @
+identifier writestream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+ ssize_t writestream(struct file *f, const char *buf, size_t len, loff_t *ppos)
+ {
+ ... when != ppos
+ }
+
+
+// a function that blocks
+@ blocks @
+identifier block_f;
+identifier wait_event =~ "^wait_event_.*";
+@@
+ block_f(...) {
+ ... when exists
+ wait_event(...)
+ ... when exists
+ }
+
+// stream_reader that can block inside.
+//
+// XXX wait_* can be called not directly from current function (e.g. func -> f -> g -> wait())
+// XXX currently reader_blocks supports only direct and 1-level indirect cases.
+@ reader_blocks_direct @
+identifier stream_reader.readstream;
+identifier wait_event =~ "^wait_event_.*";
+@@
+ readstream(...)
+ {
+ ... when exists
+ wait_event(...)
+ ... when exists
+ }
+
+@ reader_blocks_1 @
+identifier stream_reader.readstream;
+identifier blocks.block_f;
+@@
+ readstream(...)
+ {
+ ... when exists
+ block_f(...)
+ ... when exists
+ }
+
+@ reader_blocks depends on reader_blocks_direct || reader_blocks_1 @
+identifier stream_reader.readstream;
+@@
+ readstream(...) {
+ ...
+ }
+
+
+// file_operations + whether they have _any_ .read, .write, .llseek ... at all.
+//
+// XXX add support for file_operations xxx[N] = ... (sound/core/pcm_native.c)
+@ fops0 @
+identifier fops;
+@@
+ struct file_operations fops = {
+ ...
+ };
+
+@ has_read @
+identifier fops0.fops;
+identifier read_f;
+@@
+ struct file_operations fops = {
+ .read = read_f,
+ };
+
+@ has_read_iter @
+identifier fops0.fops;
+identifier read_iter_f;
+@@
+ struct file_operations fops = {
+ .read_iter = read_iter_f,
+ };
+
+@ has_write @
+identifier fops0.fops;
+identifier write_f;
+@@
+ struct file_operations fops = {
+ .write = write_f,
+ };
+
+@ has_write_iter @
+identifier fops0.fops;
+identifier write_iter_f;
+@@
+ struct file_operations fops = {
+ .write_iter = write_iter_f,
+ };
+
+@ has_llseek @
+identifier fops0.fops;
+identifier llseek_f;
+@@
+ struct file_operations fops = {
+ .llseek = llseek_f,
+ };
+
+@ has_no_llseek @
+identifier fops0.fops;
+@@
+ struct file_operations fops = {
+ .llseek = no_llseek,
+ };
+
+@ has_mmap @
+identifier fops0.fops;
+identifier mmap_f;
+@@
+ struct file_operations fops = {
+ .mmap = mmap_f,
+ };
+
+@ has_copy_file_range @
+identifier fops0.fops;
+identifier copy_file_range_f;
+@@
+ struct file_operations fops = {
+ .copy_file_range = copy_file_range_f,
+ };
+
+@ has_remap_file_range @
+identifier fops0.fops;
+identifier remap_file_range_f;
+@@
+ struct file_operations fops = {
+ .remap_file_range = remap_file_range_f,
+ };
+
+@ has_splice_read @
+identifier fops0.fops;
+identifier splice_read_f;
+@@
+ struct file_operations fops = {
+ .splice_read = splice_read_f,
+ };
+
+@ has_splice_write @
+identifier fops0.fops;
+identifier splice_write_f;
+@@
+ struct file_operations fops = {
+ .splice_write = splice_write_f,
+ };
+
+
+// file_operations that is candidate for stream_open conversion - it does not
+// use mmap and other methods that assume @offset access to file.
+//
+// XXX for simplicity require no .{read/write}_iter and no .splice_{read/write} for now.
+// XXX maybe_steam.fops cannot be used in other rules - it gives "bad rule maybe_stream or bad variable fops".
+@ maybe_stream depends on (!has_llseek || has_no_llseek) && !has_mmap && !has_copy_file_range && !has_remap_file_range && !has_read_iter && !has_write_iter && !has_splice_read && !has_splice_write @
+identifier fops0.fops;
+@@
+ struct file_operations fops = {
+ };
+
+
+// ---- conversions ----
+
+// XXX .open = nonseekable_open -> .open = stream_open
+// XXX .open = func -> openfunc -> nonseekable_open
+
+// read & write
+//
+// if both are used in the same file_operations together with an opener -
+// under that conditions we can use stream_open instead of nonseekable_open.
+@ fops_rw depends on maybe_stream @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+identifier stream_writer.writestream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .read = readstream,
+ .write = writestream,
+ };
+
+@ report_rw depends on report @
+identifier fops_rw.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report && reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+ "ERROR: %s: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix." % (fops,))
+
+@ script:python depends on report && !reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+
+@ explain_rw_deadlocked depends on explain && reader_blocks @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read & write (was deadlock) */
+ ...>
+ }
+
+
+@ explain_rw_nodeadlock depends on explain && !reader_blocks @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read & write (no direct deadlock) */
+ ...>
+ }
+
+@ patch_rw depends on patch @
+identifier fops_rw.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// read, but not write
+@ fops_r depends on maybe_stream && !has_write @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .read = readstream,
+ };
+
+@ report_r depends on report @
+identifier fops_r.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_r.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .read() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_r depends on explain @
+identifier fops_r.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* read only */
+ ...>
+ }
+
+@ patch_r depends on patch @
+identifier fops_r.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// write, but not read
+@ fops_w depends on maybe_stream && !has_read @
+identifier fops0.fops, openfunc;
+identifier stream_writer.writestream;
+@@
+ struct file_operations fops = {
+ .open = openfunc,
+ .write = writestream,
+ };
+
+@ report_w depends on report @
+identifier fops_w.openfunc;
+position p1;
+@@
+ openfunc(...) {
+ <...
+ nonseekable_open@p1
+ ...>
+ }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_w.p1;
+@@
+coccilib.report.print_report(p[0],
+ "WARNING: %s: .write() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_w depends on explain @
+identifier fops_w.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ nonseekable_open /* write only */
+ ...>
+ }
+
+@ patch_w depends on patch @
+identifier fops_w.openfunc;
+@@
+ openfunc(...) {
+ <...
+- nonseekable_open
++ stream_open
+ ...>
+ }
+
+
+// no read, no write - don't change anything
diff --git a/scripts/coccinelle/free/put_device.cocci b/scripts/coccinelle/free/put_device.cocci
index 7395697e7f19..c9f071b0a0ab 100644
--- a/scripts/coccinelle/free/put_device.cocci
+++ b/scripts/coccinelle/free/put_device.cocci
@@ -32,6 +32,7 @@ if (id == NULL || ...) { ... return ...; }
( id
| (T2)dev_get_drvdata(&id->dev)
| (T3)platform_get_drvdata(id)
+| &id->dev
);
| return@p2 ...;
)
diff --git a/scripts/coccinelle/misc/badty.cocci b/scripts/coccinelle/misc/badty.cocci
index 481cf301ccfc..08470362199c 100644
--- a/scripts/coccinelle/misc/badty.cocci
+++ b/scripts/coccinelle/misc/badty.cocci
@@ -1,4 +1,4 @@
-/// Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element
+/// Correct the size argument to alloc functions
///
//# This makes an effort to find cases where the argument to sizeof is wrong
//# in memory allocation functions by checking the type of the allocated memory
diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c
index 611945611bf8..1dcfb288ee63 100644
--- a/scripts/kconfig/lxdialog/inputbox.c
+++ b/scripts/kconfig/lxdialog/inputbox.c
@@ -113,7 +113,8 @@ do_resize:
case KEY_DOWN:
break;
case KEY_BACKSPACE:
- case 127:
+ case 8: /* ^H */
+ case 127: /* ^? */
if (pos) {
wattrset(dialog, dlg.inputbox.atr);
if (input_x == 0) {
diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index a4670f4e825a..ac92c0ded6c5 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c
@@ -1048,7 +1048,7 @@ static int do_match(int key, struct match_state *state, int *ans)
state->match_direction = FIND_NEXT_MATCH_UP;
*ans = get_mext_match(state->pattern,
state->match_direction);
- } else if (key == KEY_BACKSPACE || key == 127) {
+ } else if (key == KEY_BACKSPACE || key == 8 || key == 127) {
state->pattern[strlen(state->pattern)-1] = '\0';
adj_match_dir(&state->match_direction);
} else
diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c
index 7be620a1fcdb..77f525a8617c 100644
--- a/scripts/kconfig/nconf.gui.c
+++ b/scripts/kconfig/nconf.gui.c
@@ -439,7 +439,8 @@ int dialog_inputbox(WINDOW *main_window,
case KEY_F(F_EXIT):
case KEY_F(F_BACK):
break;
- case 127:
+ case 8: /* ^H */
+ case 127: /* ^? */
case KEY_BACKSPACE:
if (cursor_position > 0) {
memmove(&result[cursor_position-1],
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 0b0d1080b1c5..f277e116e0eb 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -639,7 +639,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
info->sechdrs[sym->st_shndx].sh_offset -
(info->hdr->e_type != ET_REL ?
info->sechdrs[sym->st_shndx].sh_addr : 0);
- crc = *crcp;
+ crc = TO_NATIVE(*crcp);
}
sym_update_crc(symname + strlen("__crc_"), mod, crc,
export);
diff --git a/security/Kconfig b/security/Kconfig
index 1d6463fb1450..353cfef71d4e 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -239,8 +239,46 @@ source "security/safesetid/Kconfig"
source "security/integrity/Kconfig"
+choice
+ prompt "First legacy 'major LSM' to be initialized"
+ default DEFAULT_SECURITY_SELINUX if SECURITY_SELINUX
+ default DEFAULT_SECURITY_SMACK if SECURITY_SMACK
+ default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO
+ default DEFAULT_SECURITY_APPARMOR if SECURITY_APPARMOR
+ default DEFAULT_SECURITY_DAC
+
+ help
+ This choice is there only for converting CONFIG_DEFAULT_SECURITY
+ in old kernel configs to CONFIG_LSM in new kernel configs. Don't
+ change this choice unless you are creating a fresh kernel config,
+ for this choice will be ignored after CONFIG_LSM has been set.
+
+ Selects the legacy "major security module" that will be
+ initialized first. Overridden by non-default CONFIG_LSM.
+
+ config DEFAULT_SECURITY_SELINUX
+ bool "SELinux" if SECURITY_SELINUX=y
+
+ config DEFAULT_SECURITY_SMACK
+ bool "Simplified Mandatory Access Control" if SECURITY_SMACK=y
+
+ config DEFAULT_SECURITY_TOMOYO
+ bool "TOMOYO" if SECURITY_TOMOYO=y
+
+ config DEFAULT_SECURITY_APPARMOR
+ bool "AppArmor" if SECURITY_APPARMOR=y
+
+ config DEFAULT_SECURITY_DAC
+ bool "Unix Discretionary Access Controls"
+
+endchoice
+
config LSM
string "Ordered list of enabled LSMs"
+ default "yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor" if DEFAULT_SECURITY_SMACK
+ default "yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" if DEFAULT_SECURITY_APPARMOR
+ default "yama,loadpin,safesetid,integrity,tomoyo" if DEFAULT_SECURITY_TOMOYO
+ default "yama,loadpin,safesetid,integrity" if DEFAULT_SECURITY_DAC
default "yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
help
A comma-separated list of LSMs, in initialization order.
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index bcc9c6ead7fd..efdbf17f3915 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -125,7 +125,7 @@ out:
*/
int TSS_authhmac(unsigned char *digest, const unsigned char *key,
unsigned int keylen, unsigned char *h1,
- unsigned char *h2, unsigned char h3, ...)
+ unsigned char *h2, unsigned int h3, ...)
{
unsigned char paramdigest[SHA1_DIGEST_SIZE];
struct sdesc *sdesc;
@@ -135,13 +135,16 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key,
int ret;
va_list argp;
+ if (!chip)
+ return -ENODEV;
+
sdesc = init_sdesc(hashalg);
if (IS_ERR(sdesc)) {
pr_info("trusted_key: can't alloc %s\n", hash_alg);
return PTR_ERR(sdesc);
}
- c = h3;
+ c = !!h3;
ret = crypto_shash_init(&sdesc->shash);
if (ret < 0)
goto out;
@@ -196,6 +199,9 @@ int TSS_checkhmac1(unsigned char *buffer,
va_list argp;
int ret;
+ if (!chip)
+ return -ENODEV;
+
bufsize = LOAD32(buffer, TPM_SIZE_OFFSET);
tag = LOAD16(buffer, 0);
ordinal = command;
@@ -363,6 +369,9 @@ int trusted_tpm_send(unsigned char *cmd, size_t buflen)
{
int rc;
+ if (!chip)
+ return -ENODEV;
+
dump_tpm_buf(cmd);
rc = tpm_send(chip, cmd, buflen);
dump_tpm_buf(cmd);
@@ -429,6 +438,9 @@ int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
{
int ret;
+ if (!chip)
+ return -ENODEV;
+
INIT_BUF(tb);
store16(tb, TPM_TAG_RQU_COMMAND);
store32(tb, TPM_OIAP_SIZE);
@@ -1245,9 +1257,13 @@ static int __init init_trusted(void)
{
int ret;
+ /* encrypted_keys.ko depends on successful load of this module even if
+ * TPM is not used.
+ */
chip = tpm_default_chip();
if (!chip)
- return -ENOENT;
+ return 0;
+
ret = init_digests();
if (ret < 0)
goto err_put;
@@ -1269,10 +1285,12 @@ err_put:
static void __exit cleanup_trusted(void)
{
- put_device(&chip->dev);
- kfree(digests);
- trusted_shash_release();
- unregister_key_type(&key_type_trusted);
+ if (chip) {
+ put_device(&chip->dev);
+ kfree(digests);
+ trusted_shash_release();
+ unregister_key_type(&key_type_trusted);
+ }
}
late_initcall(init_trusted);
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 57cc60722dd3..efac68556b45 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -206,7 +206,7 @@ static void yama_ptracer_del(struct task_struct *tracer,
* yama_task_free - check for task_pid to remove from exception list
* @task: task being removed
*/
-void yama_task_free(struct task_struct *task)
+static void yama_task_free(struct task_struct *task)
{
yama_ptracer_del(task, task);
}
@@ -222,7 +222,7 @@ void yama_task_free(struct task_struct *task)
* Return 0 on success, -ve on error. -ENOSYS is returned when Yama
* does not handle the given option.
*/
-int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3,
+static int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5)
{
int rc = -ENOSYS;
@@ -401,7 +401,7 @@ static int yama_ptrace_access_check(struct task_struct *child,
*
* Returns 0 if following the ptrace is allowed, -ve on error.
*/
-int yama_ptrace_traceme(struct task_struct *parent)
+static int yama_ptrace_traceme(struct task_struct *parent)
{
int rc = 0;
@@ -452,7 +452,7 @@ static int yama_dointvec_minmax(struct ctl_table *table, int write,
static int zero;
static int max_scope = YAMA_SCOPE_NO_ATTACH;
-struct ctl_path yama_sysctl_path[] = {
+static struct ctl_path yama_sysctl_path[] = {
{ .procname = "kernel", },
{ .procname = "yama", },
{ }
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index d5b0d7ba83c4..f6ae68017608 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -940,6 +940,28 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream)
oss_frame_size = snd_pcm_format_physical_width(params_format(params)) *
params_channels(params) / 8;
+ err = snd_pcm_oss_period_size(substream, params, sparams);
+ if (err < 0)
+ goto failure;
+
+ n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size);
+ err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL);
+ if (err < 0)
+ goto failure;
+
+ err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS,
+ runtime->oss.periods, NULL);
+ if (err < 0)
+ goto failure;
+
+ snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL);
+
+ err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams);
+ if (err < 0) {
+ pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err);
+ goto failure;
+ }
+
#ifdef CONFIG_SND_PCM_OSS_PLUGINS
snd_pcm_oss_plugin_clear(substream);
if (!direct) {
@@ -974,27 +996,6 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream)
}
#endif
- err = snd_pcm_oss_period_size(substream, params, sparams);
- if (err < 0)
- goto failure;
-
- n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size);
- err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL);
- if (err < 0)
- goto failure;
-
- err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS,
- runtime->oss.periods, NULL);
- if (err < 0)
- goto failure;
-
- snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL);
-
- if ((err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams)) < 0) {
- pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err);
- goto failure;
- }
-
if (runtime->oss.trigger) {
sw_params->start_threshold = 1;
} else {
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index f731f904e8cc..1d8452912b14 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1445,8 +1445,15 @@ static int snd_pcm_pause(struct snd_pcm_substream *substream, int push)
static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, int state)
{
struct snd_pcm_runtime *runtime = substream->runtime;
- if (runtime->status->state == SNDRV_PCM_STATE_SUSPENDED)
+ switch (runtime->status->state) {
+ case SNDRV_PCM_STATE_SUSPENDED:
return -EBUSY;
+ /* unresumable PCM state; return -EBUSY for skipping suspend */
+ case SNDRV_PCM_STATE_OPEN:
+ case SNDRV_PCM_STATE_SETUP:
+ case SNDRV_PCM_STATE_DISCONNECTED:
+ return -EBUSY;
+ }
runtime->trigger_master = substream;
return 0;
}
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index ee601d7f0926..c0690d1ecd55 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/mm.h>
+#include <linux/nospec.h>
#include <sound/rawmidi.h>
#include <sound/info.h>
#include <sound/control.h>
@@ -601,6 +602,7 @@ static int __snd_rawmidi_info_select(struct snd_card *card,
return -ENXIO;
if (info->stream < 0 || info->stream > 1)
return -EINVAL;
+ info->stream = array_index_nospec(info->stream, 2);
pstr = &rmidi->streams[info->stream];
if (pstr->substream_count == 0)
return -ENOENT;
diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c
index 278ebb993122..c93945917235 100644
--- a/sound/core/seq/oss/seq_oss_synth.c
+++ b/sound/core/seq/oss/seq_oss_synth.c
@@ -617,13 +617,14 @@ int
snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_info *inf)
{
struct seq_oss_synth *rec;
+ struct seq_oss_synthinfo *info = get_synthinfo_nospec(dp, dev);
- if (dev < 0 || dev >= dp->max_synthdev)
+ if (!info)
return -ENXIO;
- if (dp->synths[dev].is_midi) {
+ if (info->is_midi) {
struct midi_info minf;
- snd_seq_oss_midi_make_info(dp, dp->synths[dev].midi_mapped, &minf);
+ snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf);
inf->synth_type = SYNTH_TYPE_MIDI;
inf->synth_subtype = 0;
inf->nr_voices = 16;
diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 29882bda7632..e1ebc6d5f382 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -1005,7 +1005,6 @@ struct ca0132_spec {
unsigned int scp_resp_header;
unsigned int scp_resp_data[4];
unsigned int scp_resp_count;
- bool alt_firmware_present;
bool startup_check_entered;
bool dsp_reload;
@@ -7518,7 +7517,7 @@ static bool ca0132_download_dsp_images(struct hda_codec *codec)
bool dsp_loaded = false;
struct ca0132_spec *spec = codec->spec;
const struct dsp_image_seg *dsp_os_image;
- const struct firmware *fw_entry;
+ const struct firmware *fw_entry = NULL;
/*
* Alternate firmwares for different variants. The Recon3Di apparently
* can use the default firmware, but I'll leave the option in case
@@ -7529,33 +7528,26 @@ static bool ca0132_download_dsp_images(struct hda_codec *codec)
case QUIRK_R3D:
case QUIRK_AE5:
if (request_firmware(&fw_entry, DESKTOP_EFX_FILE,
- codec->card->dev) != 0) {
+ codec->card->dev) != 0)
codec_dbg(codec, "Desktop firmware not found.");
- spec->alt_firmware_present = false;
- } else {
+ else
codec_dbg(codec, "Desktop firmware selected.");
- spec->alt_firmware_present = true;
- }
break;
case QUIRK_R3DI:
if (request_firmware(&fw_entry, R3DI_EFX_FILE,
- codec->card->dev) != 0) {
+ codec->card->dev) != 0)
codec_dbg(codec, "Recon3Di alt firmware not detected.");
- spec->alt_firmware_present = false;
- } else {
+ else
codec_dbg(codec, "Recon3Di firmware selected.");
- spec->alt_firmware_present = true;
- }
break;
default:
- spec->alt_firmware_present = false;
break;
}
/*
* Use default ctefx.bin if no alt firmware is detected, or if none
* exists for your particular codec.
*/
- if (!spec->alt_firmware_present) {
+ if (!fw_entry) {
codec_dbg(codec, "Default firmware selected.");
if (request_firmware(&fw_entry, EFX_FILE,
codec->card->dev) != 0)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 191830d4fa40..a3fb3d4c5730 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5688,6 +5688,8 @@ enum {
ALC225_FIXUP_WYSE_AUTO_MUTE,
ALC225_FIXUP_WYSE_DISABLE_MIC_VREF,
ALC286_FIXUP_ACER_AIO_HEADSET_MIC,
+ ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
+ ALC299_FIXUP_PREDATOR_SPK,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -6696,6 +6698,22 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE
},
+ [ALC256_FIXUP_ASUS_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x04a11120 }, /* use as headset mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
+ },
+ [ALC299_FIXUP_PREDATOR_SPK] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x21, 0x90170150 }, /* use as headset mic, without its own jack detect */
+ { }
+ }
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6712,9 +6730,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
+ SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK),
SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
@@ -7111,6 +7133,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
{.id = ALC255_FIXUP_DELL_HEADSET_MIC, .name = "alc255-dell-headset"},
{.id = ALC295_FIXUP_HP_X360, .name = "alc295-hp-x360"},
{.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-sense-combo"},
+ {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"},
{}
};
#define ALC225_STANDARD_PINS \
@@ -7331,6 +7354,18 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x14, 0x90170110},
{0x1b, 0x90a70130},
{0x21, 0x03211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
+ {0x21, 0x03211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
+ {0x21, 0x04211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
+ {0x1a, 0x90a70130},
+ {0x1b, 0x90170110},
+ {0x21, 0x03211020}),
SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
{0x12, 0xb7a60130},
{0x13, 0xb8a61140},
diff --git a/tools/arch/alpha/include/uapi/asm/mman.h b/tools/arch/alpha/include/uapi/asm/mman.h
index c317d3e6867a..ea6a255ae61f 100644
--- a/tools/arch/alpha/include/uapi/asm/mman.h
+++ b/tools/arch/alpha/include/uapi/asm/mman.h
@@ -27,8 +27,6 @@
#define MAP_NONBLOCK 0x40000
#define MAP_NORESERVE 0x10000
#define MAP_POPULATE 0x20000
-#define MAP_PRIVATE 0x02
-#define MAP_SHARED 0x01
#define MAP_STACK 0x80000
#define PROT_EXEC 0x4
#define PROT_GROWSDOWN 0x01000000
diff --git a/tools/arch/mips/include/uapi/asm/mman.h b/tools/arch/mips/include/uapi/asm/mman.h
index de2206883abc..c8acaa138d46 100644
--- a/tools/arch/mips/include/uapi/asm/mman.h
+++ b/tools/arch/mips/include/uapi/asm/mman.h
@@ -28,8 +28,6 @@
#define MAP_NONBLOCK 0x20000
#define MAP_NORESERVE 0x0400
#define MAP_POPULATE 0x10000
-#define MAP_PRIVATE 0x002
-#define MAP_SHARED 0x001
#define MAP_STACK 0x40000
#define PROT_EXEC 0x04
#define PROT_GROWSDOWN 0x01000000
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
index 1bd78758bde9..f9fd1325f5bd 100644
--- a/tools/arch/parisc/include/uapi/asm/mman.h
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -27,8 +27,6 @@
#define MAP_NONBLOCK 0x20000
#define MAP_NORESERVE 0x4000
#define MAP_POPULATE 0x10000
-#define MAP_PRIVATE 0x02
-#define MAP_SHARED 0x01
#define MAP_STACK 0x40000
#define PROT_EXEC 0x4
#define PROT_GROWSDOWN 0x01000000
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 8c876c166ef2..26ca425f4c2c 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -463,10 +463,12 @@ struct kvm_ppc_cpu_char {
#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58)
#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57)
#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56)
+#define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54)
#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63)
#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62)
#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61)
+#define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58)
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 16511d97e8dc..09652eabe769 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -152,7 +152,8 @@ struct kvm_s390_vm_cpu_subfunc {
__u8 pcc[16]; /* with MSA4 */
__u8 ppno[16]; /* with MSA5 */
__u8 kma[16]; /* with MSA8 */
- __u8 reserved[1808];
+ __u8 kdsa[16]; /* with MSA9 */
+ __u8 reserved[1792];
};
/* kvm attributes for crypto */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 6d6122524711..981ff9479648 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -344,6 +344,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
diff --git a/tools/arch/xtensa/include/uapi/asm/mman.h b/tools/arch/xtensa/include/uapi/asm/mman.h
index 34dde6f44dae..f2b08c990afc 100644
--- a/tools/arch/xtensa/include/uapi/asm/mman.h
+++ b/tools/arch/xtensa/include/uapi/asm/mman.h
@@ -27,8 +27,6 @@
#define MAP_NONBLOCK 0x20000
#define MAP_NORESERVE 0x0400
#define MAP_POPULATE 0x10000
-#define MAP_PRIVATE 0x002
-#define MAP_SHARED 0x001
#define MAP_STACK 0x40000
#define PROT_EXEC 0x4
#define PROT_GROWSDOWN 0x01000000
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index d68eb4fb40cc..2b0e02c38870 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -4,9 +4,9 @@
/*
* Check OpenCSD library version is sufficient to provide required features
*/
-#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
+#define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0))
#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
-#error "OpenCSD >= 0.10.0 is required"
+#error "OpenCSD >= 0.11.0 is required"
#endif
int main(void)
diff --git a/tools/include/uapi/asm-generic/mman-common-tools.h b/tools/include/uapi/asm-generic/mman-common-tools.h
new file mode 100644
index 000000000000..af7d0d3a3182
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman-common-tools.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H
+#define __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H
+
+#include <asm-generic/mman-common.h>
+
+/* We need this because we need to have tools/include/uapi/ included in the tools
+ * header search path to get access to stuff that is not yet in the system's
+ * copy of the files in that directory, but since this cset:
+ *
+ * 746c9398f5ac ("arch: move common mmap flags to linux/mman.h")
+ *
+ * We end up making sys/mman.h, that is in the system headers, to not find the
+ * MAP_SHARED and MAP_PRIVATE defines because they are not anymore in our copy
+ * of asm-generic/mman-common.h. So we define them here and include this header
+ * from each of the per arch mman.h headers.
+ */
+#ifndef MAP_SHARED
+#define MAP_SHARED 0x01 /* Share changes */
+#define MAP_PRIVATE 0x02 /* Changes are private */
+#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#endif
+#endif // __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index e7ee32861d51..abd238d0f7a4 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -15,9 +15,7 @@
#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */
#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
-#define MAP_SHARED 0x01 /* Share changes */
-#define MAP_PRIVATE 0x02 /* Changes are private */
-#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+/* 0x01 - 0x03 are defined in linux/mman.h */
#define MAP_TYPE 0x0f /* Mask for type of mapping */
#define MAP_FIXED 0x10 /* Interpret addr exactly */
#define MAP_ANONYMOUS 0x20 /* don't use a file */
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
index 653687d9771b..36c197fc44a0 100644
--- a/tools/include/uapi/asm-generic/mman.h
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -2,7 +2,7 @@
#ifndef __ASM_GENERIC_MMAN_H
#define __ASM_GENERIC_MMAN_H
-#include <asm-generic/mman-common.h>
+#include <asm-generic/mman-common-tools.h>
#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 12cdf611d217..dee7292e1df6 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -824,8 +824,17 @@ __SYSCALL(__NR_futex_time64, sys_futex)
__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
#endif
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
+
#undef __NR_syscalls
-#define __NR_syscalls 424
+#define __NR_syscalls 428
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 298b2e197744..397810fa2d33 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+ /*
+ * When using the following param, value should be a pointer to
+ * drm_i915_gem_context_param_sseu.
+ */
+#define I915_CONTEXT_PARAM_SSEU 0x7
__u64 value;
};
+/**
+ * Context SSEU programming
+ *
+ * It may be necessary for either functional or performance reason to configure
+ * a context to run with a reduced number of SSEU (where SSEU stands for Slice/
+ * Sub-slice/EU).
+ *
+ * This is done by configuring SSEU configuration using the below
+ * @struct drm_i915_gem_context_param_sseu for every supported engine which
+ * userspace intends to use.
+ *
+ * Not all GPUs or engines support this functionality in which case an error
+ * code -ENODEV will be returned.
+ *
+ * Also, flexibility of possible SSEU configuration permutations varies between
+ * GPU generations and software imposed limitations. Requesting such a
+ * combination will return an error code of -EINVAL.
+ *
+ * NOTE: When perf/OA is active the context's SSEU configuration is ignored in
+ * favour of a single global setting.
+ */
+struct drm_i915_gem_context_param_sseu {
+ /*
+ * Engine class & instance to be configured or queried.
+ */
+ __u16 engine_class;
+ __u16 engine_instance;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /*
+ * Mask of slices to enable for the context. Valid values are a subset
+ * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+ */
+ __u64 slice_mask;
+
+ /*
+ * Mask of subslices to enable for the context. Valid values are a
+ * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+ */
+ __u64 subslice_mask;
+
+ /*
+ * Minimum/Maximum number of EUs to enable per subslice for the
+ * context. min_eus_per_subslice must be inferior or equal to
+ * max_eus_per_subslice.
+ */
+ __u16 min_eus_per_subslice;
+ __u16 max_eus_per_subslice;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 rsvd;
+};
+
enum drm_i915_oa_format {
I915_OA_FORMAT_A13 = 1, /* HSW only */
I915_OA_FORMAT_A29, /* HSW only */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3c38ac9a92a7..929c8e537a14 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -502,16 +502,6 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
- * Description
- * Push an element *value* in *map*. *flags* is one of:
- *
- * **BPF_EXIST**
- * If the queue/stack is full, the oldest element is removed to
- * make room for this.
- * Return
- * 0 on success, or a negative error in case of failure.
- *
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
@@ -1435,14 +1425,14 @@ union bpf_attr {
* u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
- * *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * *skb*, but gets socket from **struct bpf_sock_addr** context.
* Return
* A 8-byte long non-decreasing number.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
- * *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * *skb*, but gets socket from **struct bpf_sock_ops** context.
* Return
* A 8-byte long non-decreasing number.
*
@@ -2098,52 +2088,52 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * int bpf_rc_repeat(void *ctx)
* Description
* This helper is used in programs implementing IR decoding, to
- * report a successfully decoded key press with *scancode*,
- * *toggle* value in the given *protocol*. The scancode will be
- * translated to a keycode using the rc keymap, and reported as
- * an input key down event. After a period a key up event is
- * generated. This period can be extended by calling either
- * **bpf_rc_keydown**\ () again with the same values, or calling
- * **bpf_rc_repeat**\ ().
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
*
- * Some protocols include a toggle bit, in case the button was
- * released and pressed again between consecutive scancodes.
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
*
* The *ctx* should point to the lirc sample as passed into
* the program.
*
- * The *protocol* is the decoded protocol number (see
- * **enum rc_proto** for some predefined values).
- *
* This helper is only available is the kernel was compiled with
* the **CONFIG_BPF_LIRC_MODE2** configuration option set to
* "**y**".
* Return
* 0
*
- * int bpf_rc_repeat(void *ctx)
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
* Description
* This helper is used in programs implementing IR decoding, to
- * report a successfully decoded repeat key message. This delays
- * the generation of a key up event for previously generated
- * key down event.
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
*
- * Some IR protocols like NEC have a special IR message for
- * repeating last button, for when a button is held down.
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
*
* The *ctx* should point to the lirc sample as passed into
* the program.
*
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
* This helper is only available is the kernel was compiled with
* the **CONFIG_BPF_LIRC_MODE2** configuration option set to
* "**y**".
* Return
* 0
*
- * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
* Description
* Return the cgroup v2 id of the socket associated with the *skb*.
* This is roughly similar to the **bpf_get_cgroup_classid**\ ()
@@ -2159,30 +2149,12 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
- * Description
- * Return id of cgroup v2 that is ancestor of cgroup associated
- * with the *skb* at the *ancestor_level*. The root cgroup is at
- * *ancestor_level* zero and each step down the hierarchy
- * increments the level. If *ancestor_level* == level of cgroup
- * associated with *skb*, then return value will be same as that
- * of **bpf_skb_cgroup_id**\ ().
- *
- * The helper is useful to implement policies based on cgroups
- * that are upper in hierarchy than immediate cgroup associated
- * with *skb*.
- *
- * The format of returned id and helper limitations are same as in
- * **bpf_skb_cgroup_id**\ ().
- * Return
- * The id is returned or 0 in case the id could not be retrieved.
- *
* u64 bpf_get_current_cgroup_id(void)
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
*
- * void* get_local_storage(void *map, u64 flags)
+ * void *bpf_get_local_storage(void *map, u64 flags)
* Description
* Get the pointer to the local storage area.
* The type and the size of the local storage is defined
@@ -2209,6 +2181,24 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for TCP socket matching *tuple*, optionally in a child
@@ -2289,6 +2279,16 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is
+ * removed to make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
* int bpf_map_pop_elem(struct bpf_map *map, void *value)
* Description
* Pop an element from *map*.
@@ -2343,29 +2343,94 @@ union bpf_attr {
* Return
* 0
*
+ * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * Description
+ * Acquire a spinlock represented by the pointer *lock*, which is
+ * stored as part of a value of a map. Taking the lock allows to
+ * safely update the rest of the fields in that value. The
+ * spinlock can (and must) later be released with a call to
+ * **bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ * Spinlocks in BPF programs come with a number of restrictions
+ * and constraints:
+ *
+ * * **bpf_spin_lock** objects are only allowed inside maps of
+ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ * list could be extended in the future).
+ * * BTF description of the map is mandatory.
+ * * The BPF program can take ONE lock at a time, since taking two
+ * or more could cause dead locks.
+ * * Only one **struct bpf_spin_lock** is allowed per map element.
+ * * When the lock is taken, calls (either BPF to BPF or helpers)
+ * are not allowed.
+ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ * allowed inside a spinlock-ed region.
+ * * The BPF program MUST call **bpf_spin_unlock**\ () to release
+ * the lock, on all execution paths, before it returns.
+ * * The BPF program can access **struct bpf_spin_lock** only via
+ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ * helpers. Loading or storing data into the **struct
+ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ * * To use the **bpf_spin_lock**\ () helper, the BTF description
+ * of the map value must be a struct and have **struct
+ * bpf_spin_lock** *anyname*\ **;** field at the top level.
+ * Nested lock inside another struct is not allowed.
+ * * The **struct bpf_spin_lock** *lock* field in a map value must
+ * be aligned on a multiple of 4 bytes in that value.
+ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ * the **bpf_spin_lock** field to user space.
+ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ * a BPF program, do not update the **bpf_spin_lock** field.
+ * * **bpf_spin_lock** cannot be on the stack or inside a
+ * networking packet (it can only be inside of a map values).
+ * * **bpf_spin_lock** is available to root only.
+ * * Tracing programs and socket filter programs cannot use
+ * **bpf_spin_lock**\ () due to insufficient preemption checks
+ * (but this may change in the future).
+ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ * Return
+ * 0
+ *
+ * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * Description
+ * Release the *lock* previously locked by a call to
+ * **bpf_spin_lock**\ (\ *lock*\ ).
+ * Return
+ * 0
+ *
* struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
* Description
* This helper gets a **struct bpf_sock** pointer such
- * that all the fields in bpf_sock can be accessed.
+ * that all the fields in this **bpf_sock** can be accessed.
* Return
- * A **struct bpf_sock** pointer on success, or NULL in
+ * A **struct bpf_sock** pointer on success, or **NULL** in
* case of failure.
*
* struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
* Description
* This helper gets a **struct bpf_tcp_sock** pointer from a
* **struct bpf_sock** pointer.
- *
* Return
- * A **struct bpf_tcp_sock** pointer on success, or NULL in
+ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in
* case of failure.
*
* int bpf_skb_ecn_set_ce(struct sk_buf *skb)
- * Description
- * Sets ECN of IP header to ce (congestion encountered) if
- * current value is ect (ECN capable). Works with IPv6 and IPv4.
- * Return
- * 1 if set, 0 if not set.
+ * Description
+ * Set ECN (Explicit Congestion Notification) field of IP header
+ * to **CE** (Congestion Encountered) if current value is **ECT**
+ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ * and IPv4.
+ * Return
+ * 1 if the **CE** flag is set (either by the current helper call
+ * or because it was already present), 0 if it is not set.
+ *
+ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
+ * Description
+ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ * **bpf_sk_release**\ () is unnecessary and not allowed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2465,7 +2530,8 @@ union bpf_attr {
FN(spin_unlock), \
FN(sk_fullsock), \
FN(tcp_sock), \
- FN(skb_ecn_set_ce),
+ FN(skb_ecn_set_ce), \
+ FN(get_listener_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 6448cdd9a350..a2f8658f1c55 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -41,6 +41,7 @@
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
/* (1U << 31) is reserved for signed error codes */
/*
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index d0f515d53299..fc1a64c3447b 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -12,6 +12,10 @@
#define OVERCOMMIT_ALWAYS 1
#define OVERCOMMIT_NEVER 2
+#define MAP_SHARED 0x01 /* Share changes */
+#define MAP_PRIVATE 0x02 /* Changes are private */
+#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+
/*
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
* size other than the default is desired. See hugetlb_encode.h.
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 61aaacf0cfa1..8e7c56e9590f 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -3,7 +3,7 @@
BPF_VERSION = 0
BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 1
+BPF_EXTRAVERSION = 2
MAKEFLAGS += --no-print-directory
@@ -79,8 +79,6 @@ export prefix libdir src obj
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-LIB_FILE = libbpf.a libbpf.so
-
VERSION = $(BPF_VERSION)
PATCHLEVEL = $(BPF_PATCHLEVEL)
EXTRAVERSION = $(BPF_EXTRAVERSION)
@@ -88,7 +86,10 @@ EXTRAVERSION = $(BPF_EXTRAVERSION)
OBJ = $@
N =
-LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
+LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
+
+LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION)
+LIB_FILE = libbpf.a libbpf.so*
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
@@ -128,16 +129,18 @@ all:
export srctree OUTPUT CC LD CFLAGS V
include $(srctree)/tools/build/Makefile.include
-BPF_IN := $(OUTPUT)libbpf-in.o
-LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
-VERSION_SCRIPT := libbpf.map
+BPF_IN := $(OUTPUT)libbpf-in.o
+VERSION_SCRIPT := libbpf.map
+
+LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
+LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
-CMD_TARGETS = $(LIB_FILE)
+CMD_TARGETS = $(LIB_TARGET)
CXX_TEST_TARGET = $(OUTPUT)test_libbpf
@@ -170,9 +173,13 @@ $(BPF_IN): force elfdep bpfdep
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
$(Q)$(MAKE) $(build)=libbpf
-$(OUTPUT)libbpf.so: $(BPF_IN)
- $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \
- $^ -o $@
+$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
+
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN)
+ $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \
+ -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
+ @ln -sf $(@F) $(OUTPUT)libbpf.so
+ @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION)
$(OUTPUT)libbpf.a: $(BPF_IN)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
@@ -192,6 +199,12 @@ check_abi: $(OUTPUT)libbpf.so
exit 1; \
fi
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
define do_install
if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
@@ -200,14 +213,16 @@ define do_install
endef
install_lib: all_cmd
- $(call QUIET_INSTALL, $(LIB_FILE)) \
- $(call do_install,$(LIB_FILE),$(libdir_SQ))
+ $(call QUIET_INSTALL, $(LIB_TARGET)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
install_headers:
$(call QUIET_INSTALL, headers) \
$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf.h,$(prefix)/include/bpf,644);
- $(call do_install,btf.h,$(prefix)/include/bpf,644);
+ $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
+ $(call do_install,btf.h,$(prefix)/include/bpf,644); \
+ $(call do_install,xsk.h,$(prefix)/include/bpf,644);
install: install_lib
@@ -219,7 +234,7 @@ config-clean:
clean:
$(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
- *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS
+ *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
index 5788479384ca..cef7b77eab69 100644
--- a/tools/lib/bpf/README.rst
+++ b/tools/lib/bpf/README.rst
@@ -111,6 +111,7 @@ starting from ``0.0.1``.
Every time ABI is being changed, e.g. because a new symbol is added or
semantic of existing symbol is changed, ABI version should be bumped.
+This bump in ABI version is at most once per kernel development cycle.
For example, if current state of ``libbpf.map`` is:
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1b8d8cdd3575..cf119c9b6f27 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1602,16 +1602,12 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
/* Calculate type signature hash of ENUM. */
static __u32 btf_hash_enum(struct btf_type *t)
{
- struct btf_enum *member = (struct btf_enum *)(t + 1);
- __u32 vlen = BTF_INFO_VLEN(t->info);
- __u32 h = btf_hash_common(t);
- int i;
+ __u32 h;
- for (i = 0; i < vlen; i++) {
- h = hash_combine(h, member->name_off);
- h = hash_combine(h, member->val);
- member++;
- }
+ /* don't hash vlen and enum members to support enum fwd resolving */
+ h = hash_combine(0, t->name_off);
+ h = hash_combine(h, t->info & ~0xffff);
+ h = hash_combine(h, t->size);
return h;
}
@@ -1637,6 +1633,22 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
return true;
}
+static inline bool btf_is_enum_fwd(struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM &&
+ BTF_INFO_VLEN(t->info) == 0;
+}
+
+static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
+ return btf_equal_enum(t1, t2);
+ /* ignore vlen when comparing */
+ return t1->name_off == t2->name_off &&
+ (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
+ t1->size == t2->size;
+}
+
/*
* Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
* as referenced type IDs equivalence is established separately during type
@@ -1860,6 +1872,17 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
new_id = cand_node->type_id;
break;
}
+ if (d->opts.dont_resolve_fwds)
+ continue;
+ if (btf_compat_enum(t, cand)) {
+ if (btf_is_enum_fwd(t)) {
+ /* resolve fwd to full enum */
+ new_id = cand_node->type_id;
+ break;
+ }
+ /* resolve canonical enum fwd to full enum */
+ d->map[cand_node->type_id] = type_id;
+ }
}
break;
@@ -2084,7 +2107,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return fwd_kind == real_kind;
}
- if (cand_type->info != canon_type->info)
+ if (cand_kind != canon_kind)
return 0;
switch (cand_kind) {
@@ -2092,7 +2115,10 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return btf_equal_int(cand_type, canon_type);
case BTF_KIND_ENUM:
- return btf_equal_enum(cand_type, canon_type);
+ if (d->opts.dont_resolve_fwds)
+ return btf_equal_enum(cand_type, canon_type);
+ else
+ return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_FWD:
return btf_equal_common(cand_type, canon_type);
@@ -2103,6 +2129,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ if (cand_type->info != canon_type->info)
+ return 0;
return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
case BTF_KIND_ARRAY: {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e6ad87512519..11c25d9ea431 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -840,12 +840,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
obj->efile.maps_shndx = idx;
else if (strcmp(name, BTF_ELF_SEC) == 0) {
obj->btf = btf__new(data->d_buf, data->d_size);
- if (IS_ERR(obj->btf) || btf__load(obj->btf)) {
+ if (IS_ERR(obj->btf)) {
pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
BTF_ELF_SEC, PTR_ERR(obj->btf));
- if (!IS_ERR(obj->btf))
- btf__free(obj->btf);
obj->btf = NULL;
+ continue;
+ }
+ err = btf__load(obj->btf);
+ if (err) {
+ pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n",
+ BTF_ELF_SEC, err);
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ err = 0;
}
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
btf_ext_data = data;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index f98ac82c9aea..8d0078b65486 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -126,8 +126,8 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
cfg->frame_headroom = usr_cfg->frame_headroom;
}
-static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
- const struct xsk_socket_config *usr_cfg)
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+ const struct xsk_socket_config *usr_cfg)
{
if (!usr_cfg) {
cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
@@ -135,14 +135,19 @@ static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
cfg->libbpf_flags = 0;
cfg->xdp_flags = 0;
cfg->bind_flags = 0;
- return;
+ return 0;
}
+ if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
+ return -EINVAL;
+
cfg->rx_size = usr_cfg->rx_size;
cfg->tx_size = usr_cfg->tx_size;
cfg->libbpf_flags = usr_cfg->libbpf_flags;
cfg->xdp_flags = usr_cfg->xdp_flags;
cfg->bind_flags = usr_cfg->bind_flags;
+
+ return 0;
}
int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
@@ -557,7 +562,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
}
strncpy(xsk->ifname, ifname, IFNAMSIZ);
- xsk_set_xdp_socket_config(&xsk->config, usr_config);
+ err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+ if (err)
+ goto out_socket;
if (rx) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index c9d038f91af6..53f8be0f4a1f 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -25,14 +25,17 @@ LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a
OBJTOOL := $(OUTPUT)objtool
OBJTOOL_IN := $(OBJTOOL)-in.o
+LIBELF_FLAGS := $(shell pkg-config libelf --cflags 2>/dev/null)
+LIBELF_LIBS := $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf)
+
all: $(OBJTOOL)
INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-I$(srctree)/tools/objtool/arch/$(ARCH)/include
WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
-CFLAGS += -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES)
-LDFLAGS += -lelf $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
+CFLAGS += -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
+LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
# Allow old libelf to be used:
elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 01f7555fd933..e8c9f77e9010 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -481,8 +481,8 @@ $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_t
mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
-$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl)
- $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+$(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl)
+ $(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
mount_flags_array := $(beauty_outdir)/mount_flags_array.c
mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 2ae92fddb6d5..92ee0b4378d4 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,10 @@
334 common rseq __x64_sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
+424 common pidfd_send_signal __x64_sys_pidfd_send_signal
+425 common io_uring_setup __x64_sys_io_uring_setup
+426 common io_uring_enter __x64_sys_io_uring_enter
+427 common io_uring_register __x64_sys_io_uring_register
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 7b55613924de..c68ee06cae63 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -103,7 +103,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
-check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"'
+check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
# diff non-symmetric files
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index e38518cdcbc3..74ef92f1d19a 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -107,6 +107,7 @@ import os
from PySide.QtCore import *
from PySide.QtGui import *
from PySide.QtSql import *
+pyside_version_1 = True
from decimal import *
from ctypes import *
from multiprocessing import Process, Array, Value, Event
@@ -1526,6 +1527,19 @@ def BranchDataPrep(query):
" (" + dsoname(query.value(15)) + ")")
return data
+def BranchDataPrepWA(query):
+ data = []
+ data.append(query.value(0))
+ # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string
+ data.append("{:>19}".format(query.value(1)))
+ for i in xrange(2, 8):
+ data.append(query.value(i))
+ data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
+ " (" + dsoname(query.value(11)) + ")" + " -> " +
+ tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
+ " (" + dsoname(query.value(15)) + ")")
+ return data
+
# Branch data model
class BranchModel(TreeModel):
@@ -1553,7 +1567,11 @@ class BranchModel(TreeModel):
" AND evsel_id = " + str(self.event_id) +
" ORDER BY samples.id"
" LIMIT " + str(glb_chunk_sz))
- self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample)
+ if pyside_version_1 and sys.version_info[0] == 3:
+ prep = BranchDataPrepWA
+ else:
+ prep = BranchDataPrep
+ self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample)
self.fetcher.done.connect(self.Update)
self.fetcher.Fetch(glb_chunk_sz)
@@ -2079,14 +2097,6 @@ def IsSelectable(db, table, sql = ""):
return False
return True
-# SQL data preparation
-
-def SQLTableDataPrep(query, count):
- data = []
- for i in xrange(count):
- data.append(query.value(i))
- return data
-
# SQL table data model item
class SQLTableItem():
@@ -2110,7 +2120,7 @@ class SQLTableModel(TableModel):
self.more = True
self.populated = 0
self.column_headers = column_headers
- self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): SQLTableDataPrep(x, y), self.AddSample)
+ self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): self.SQLTableDataPrep(x, y), self.AddSample)
self.fetcher.done.connect(self.Update)
self.fetcher.Fetch(glb_chunk_sz)
@@ -2154,6 +2164,12 @@ class SQLTableModel(TableModel):
def columnHeader(self, column):
return self.column_headers[column]
+ def SQLTableDataPrep(self, query, count):
+ data = []
+ for i in xrange(count):
+ data.append(query.value(i))
+ return data
+
# SQL automatic table data model
class SQLAutoTableModel(SQLTableModel):
@@ -2182,8 +2198,32 @@ class SQLAutoTableModel(SQLTableModel):
QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'")
while query.next():
column_headers.append(query.value(0))
+ if pyside_version_1 and sys.version_info[0] == 3:
+ if table_name == "samples_view":
+ self.SQLTableDataPrep = self.samples_view_DataPrep
+ if table_name == "samples":
+ self.SQLTableDataPrep = self.samples_DataPrep
super(SQLAutoTableModel, self).__init__(glb, sql, column_headers, parent)
+ def samples_view_DataPrep(self, query, count):
+ data = []
+ data.append(query.value(0))
+ # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string
+ data.append("{:>19}".format(query.value(1)))
+ for i in xrange(2, count):
+ data.append(query.value(i))
+ return data
+
+ def samples_DataPrep(self, query, count):
+ data = []
+ for i in xrange(9):
+ data.append(query.value(i))
+ # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string
+ data.append("{:>19}".format(query.value(9)))
+ for i in xrange(10, count):
+ data.append(query.value(i))
+ return data
+
# Base class for custom ResizeColumnsToContents
class ResizeColumnsToContentsBase(QObject):
@@ -2868,9 +2908,13 @@ class LibXED():
ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
if not ok:
return 0, ""
+ if sys.version_info[0] == 2:
+ result = inst.buffer.value
+ else:
+ result = inst.buffer.value.decode()
# Return instruction length and the disassembled instruction text
# For now, assume the length is in byte 166
- return inst.xedd[166], inst.buffer.value
+ return inst.xedd[166], result
def TryOpen(file_name):
try:
@@ -2886,9 +2930,14 @@ def Is64Bit(f):
header = f.read(7)
f.seek(pos)
magic = header[0:4]
- eclass = ord(header[4])
- encoding = ord(header[5])
- version = ord(header[6])
+ if sys.version_info[0] == 2:
+ eclass = ord(header[4])
+ encoding = ord(header[5])
+ version = ord(header[6])
+ else:
+ eclass = header[4]
+ encoding = header[5]
+ version = header[6]
if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1:
result = True if eclass == 2 else False
return result
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
index 32bac9c0d694..5f5eefcb3c74 100755
--- a/tools/perf/trace/beauty/mmap_flags.sh
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -1,15 +1,18 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
-if [ $# -ne 2 ] ; then
+if [ $# -ne 3 ] ; then
[ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+ linux_header_dir=tools/include/uapi/linux
header_dir=tools/include/uapi/asm-generic
arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
else
- header_dir=$1
- arch_header_dir=$2
+ linux_header_dir=$1
+ header_dir=$2
+ arch_header_dir=$3
fi
+linux_mman=${linux_header_dir}/mman.h
arch_mman=${arch_header_dir}/mman.h
# those in egrep -vw are flags, we want just the bits
@@ -20,6 +23,11 @@ egrep -q $regex ${arch_mman} && \
(egrep $regex ${arch_mman} | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q $regex ${linux_mman} && \
+(egrep $regex ${linux_mman} | \
+ egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
(egrep $regex ${header_dir}/mman-common.h | \
egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index ba4c623cd8de..39fe21e1cf93 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -387,6 +387,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
break;
case OCSD_INSTR_ISB:
case OCSD_INSTR_DSB_DMB:
+ case OCSD_INSTR_WFI_WFE:
case OCSD_INSTR_OTHER:
default:
packet->last_instr_taken_branch = false;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ec78e93085de..6689378ee577 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -231,35 +231,6 @@ void perf_evlist__set_leader(struct perf_evlist *evlist)
}
}
-void perf_event_attr__set_max_precise_ip(struct perf_event_attr *pattr)
-{
- struct perf_event_attr attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .exclude_kernel = 1,
- .precise_ip = 3,
- };
-
- event_attr_init(&attr);
-
- /*
- * Unnamed union member, not supported as struct member named
- * initializer in older compilers such as gcc 4.4.7
- */
- attr.sample_period = 1;
-
- while (attr.precise_ip != 0) {
- int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
- if (fd != -1) {
- close(fd);
- break;
- }
- --attr.precise_ip;
- }
-
- pattr->precise_ip = attr.precise_ip;
-}
-
int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise)
{
struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index dcb68f34d2cd..6a94785b9100 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -315,8 +315,6 @@ void perf_evlist__to_front(struct perf_evlist *evlist,
void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
struct perf_evsel *tracking_evsel);
-void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
-
struct perf_evsel *
perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7835e05f0c0a..66d066f18b5b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -295,7 +295,6 @@ struct perf_evsel *perf_evsel__new_cycles(bool precise)
if (!precise)
goto new_event;
- perf_event_attr__set_max_precise_ip(&attr);
/*
* Now let the usual logic to set up the perf_event_attr defaults
* to kick in when we return and before perf_evsel__open() is called.
@@ -305,6 +304,8 @@ new_event:
if (evsel == NULL)
goto out;
+ evsel->precise_max = true;
+
/* use asprintf() because free(evsel) assumes name is allocated */
if (asprintf(&evsel->name, "cycles%s%s%.*s",
(attr.precise_ip || attr.exclude_kernel) ? ":" : "",
@@ -1083,7 +1084,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
}
if (evsel->precise_max)
- perf_event_attr__set_max_precise_ip(attr);
+ attr->precise_ip = 3;
if (opts->all_user) {
attr->exclude_kernel = 1;
@@ -1749,6 +1750,59 @@ static bool ignore_missing_thread(struct perf_evsel *evsel,
return true;
}
+static void display_attr(struct perf_event_attr *attr)
+{
+ if (verbose >= 2) {
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ fprintf(stderr, "perf_event_attr:\n");
+ perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ }
+}
+
+static int perf_event_open(struct perf_evsel *evsel,
+ pid_t pid, int cpu, int group_fd,
+ unsigned long flags)
+{
+ int precise_ip = evsel->attr.precise_ip;
+ int fd;
+
+ while (1) {
+ pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
+ pid, cpu, group_fd, flags);
+
+ fd = sys_perf_event_open(&evsel->attr, pid, cpu, group_fd, flags);
+ if (fd >= 0)
+ break;
+
+ /*
+ * Do quick precise_ip fallback if:
+ * - there is precise_ip set in perf_event_attr
+ * - maximum precise is requested
+ * - sys_perf_event_open failed with ENOTSUP error,
+ * which is associated with wrong precise_ip
+ */
+ if (!precise_ip || !evsel->precise_max || (errno != ENOTSUP))
+ break;
+
+ /*
+ * We tried all the precise_ip values, and it's
+ * still failing, so leave it to standard fallback.
+ */
+ if (!evsel->attr.precise_ip) {
+ evsel->attr.precise_ip = precise_ip;
+ break;
+ }
+
+ pr_debug2("\nsys_perf_event_open failed, error %d\n", -ENOTSUP);
+ evsel->attr.precise_ip--;
+ pr_debug2("decreasing precise_ip by one (%d)\n", evsel->attr.precise_ip);
+ display_attr(&evsel->attr);
+ }
+
+ return fd;
+}
+
int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
struct thread_map *threads)
{
@@ -1824,12 +1878,7 @@ retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
- if (verbose >= 2) {
- fprintf(stderr, "%.60s\n", graph_dotted_line);
- fprintf(stderr, "perf_event_attr:\n");
- perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL);
- fprintf(stderr, "%.60s\n", graph_dotted_line);
- }
+ display_attr(&evsel->attr);
for (cpu = 0; cpu < cpus->nr; cpu++) {
@@ -1841,13 +1890,10 @@ retry_sample_id:
group_fd = get_group_fd(evsel, cpu, thread);
retry_open:
- pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, cpus->map[cpu], group_fd, flags);
-
test_attr__ready();
- fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu],
- group_fd, flags);
+ fd = perf_event_open(evsel, pid, cpus->map[cpu],
+ group_fd, flags);
FD(evsel, cpu, thread) = fd;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 6e03db142091..872fab163585 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -251,19 +251,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
decoder->tsc_ctc_ratio_d;
-
- /*
- * Allow for timestamps appearing to backwards because a TSC
- * packet has slipped past a MTC packet, so allow 2 MTC ticks
- * or ...
- */
- decoder->tsc_slip = multdiv(2 << decoder->mtc_shift,
- decoder->tsc_ctc_ratio_n,
- decoder->tsc_ctc_ratio_d);
}
- /* ... or 0x100 paranoia */
- if (decoder->tsc_slip < 0x100)
- decoder->tsc_slip = 0x100;
+
+ /*
+ * A TSC packet can slip past MTC packets so that the timestamp appears
+ * to go backwards. One estimate is that can be up to about 40 CPU
+ * cycles, which is certainly less than 0x1000 TSC ticks, but accept
+ * slippage an order of magnitude more to be on the safe side.
+ */
+ decoder->tsc_slip = 0x10000;
intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 61959aba7e27..3c520baa198c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1421,6 +1421,20 @@ static void machine__set_kernel_mmap(struct machine *machine,
machine->vmlinux_map->end = ~0ULL;
}
+static void machine__update_kernel_mmap(struct machine *machine,
+ u64 start, u64 end)
+{
+ struct map *map = machine__kernel_map(machine);
+
+ map__get(map);
+ map_groups__remove(&machine->kmaps, map);
+
+ machine__set_kernel_mmap(machine, start, end);
+
+ map_groups__insert(&machine->kmaps, map);
+ map__put(map);
+}
+
int machine__create_kernel_maps(struct machine *machine)
{
struct dso *kernel = machine__get_kernel(machine);
@@ -1453,17 +1467,11 @@ int machine__create_kernel_maps(struct machine *machine)
goto out_put;
}
- /* we have a real start address now, so re-order the kmaps */
- map = machine__kernel_map(machine);
-
- map__get(map);
- map_groups__remove(&machine->kmaps, map);
-
- /* assume it's the last in the kmaps */
- machine__set_kernel_mmap(machine, addr, ~0ULL);
-
- map_groups__insert(&machine->kmaps, map);
- map__put(map);
+ /*
+ * we have a real start address now, so re-order the kmaps
+ * assume it's the last in the kmaps
+ */
+ machine__update_kernel_mmap(machine, addr, ~0ULL);
}
if (machine__create_extra_kernel_maps(machine, kernel))
@@ -1599,7 +1607,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (strstr(kernel->long_name, "vmlinux"))
dso__set_short_name(kernel, "[kernel.vmlinux]", false);
- machine__set_kernel_mmap(machine, event->mmap.start,
+ machine__update_kernel_mmap(machine, event->mmap.start,
event->mmap.start + event->mmap.len);
/*
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6199a3174ab9..e0429f4ef335 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -732,10 +732,20 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
if (!is_arm_pmu_core(name)) {
pname = pe->pmu ? pe->pmu : "cpu";
+
+ /*
+ * uncore alias may be from different PMU
+ * with common prefix
+ */
+ if (pmu_is_uncore(name) &&
+ !strncmp(pname, name, strlen(pname)))
+ goto new_alias;
+
if (strcmp(pname, name))
continue;
}
+new_alias:
/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
(char *)pe->desc, (char *)pe->event,
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c3fad065c89c..c7727be9719f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -44,6 +44,7 @@
#include <cpuid.h>
#include <linux/capability.h>
#include <errno.h>
+#include <math.h>
char *proc_stat = "/proc/stat";
FILE *outf;
@@ -63,7 +64,6 @@ unsigned int dump_only;
unsigned int do_snb_cstates;
unsigned int do_knl_cstates;
unsigned int do_slm_cstates;
-unsigned int do_cnl_cstates;
unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
@@ -141,9 +141,21 @@ unsigned int first_counter_read = 1;
#define RAPL_CORES_ENERGY_STATUS (1 << 9)
/* 0x639 MSR_PP0_ENERGY_STATUS */
+#define RAPL_PER_CORE_ENERGY (1 << 10)
+ /* Indicates cores energy collection is per-core,
+ * not per-package. */
+#define RAPL_AMD_F17H (1 << 11)
+ /* 0xc0010299 MSR_RAPL_PWR_UNIT */
+ /* 0xc001029a MSR_CORE_ENERGY_STAT */
+ /* 0xc001029b MSR_PKG_ENERGY_STAT */
#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
#define TJMAX_DEFAULT 100
+/* MSRs that are not yet in the kernel-provided header. */
+#define MSR_RAPL_PWR_UNIT 0xc0010299
+#define MSR_CORE_ENERGY_STAT 0xc001029a
+#define MSR_PKG_ENERGY_STAT 0xc001029b
+
#define MAX(a, b) ((a) > (b) ? (a) : (b))
/*
@@ -187,6 +199,7 @@ struct core_data {
unsigned long long c7;
unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
unsigned int core_temp_c;
+ unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
unsigned long long counter[MAX_ADDED_COUNTERS];
} *core_even, *core_odd;
@@ -273,6 +286,7 @@ struct system_summary {
struct cpu_topology {
int physical_package_id;
+ int die_id;
int logical_cpu_id;
int physical_node_id;
int logical_node_id; /* 0-based count within the package */
@@ -283,6 +297,7 @@ struct cpu_topology {
struct topo_params {
int num_packages;
+ int num_die;
int num_cpus;
int num_cores;
int max_cpu_num;
@@ -314,9 +329,8 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg
int retval, pkg_no, core_no, thread_no, node_no;
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
- for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
- for (node_no = 0; node_no < topo.nodes_per_pkg;
- node_no++) {
+ for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
+ for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
for (thread_no = 0; thread_no <
topo.threads_per_core; ++thread_no) {
struct thread_data *t;
@@ -442,6 +456,7 @@ struct msr_counter bic[] = {
{ 0x0, "CPU" },
{ 0x0, "APIC" },
{ 0x0, "X2APIC" },
+ { 0x0, "Die" },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -495,6 +510,7 @@ struct msr_counter bic[] = {
#define BIC_CPU (1ULL << 47)
#define BIC_APIC (1ULL << 48)
#define BIC_X2APIC (1ULL << 49)
+#define BIC_Die (1ULL << 50)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
@@ -621,6 +637,8 @@ void print_header(char *delim)
outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
if (DO_BIC(BIC_Package))
outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Die))
+ outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
if (DO_BIC(BIC_Node))
outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
if (DO_BIC(BIC_Core))
@@ -667,7 +685,7 @@ void print_header(char *delim)
if (DO_BIC(BIC_CPU_c1))
outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
- if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
+ if (DO_BIC(BIC_CPU_c3))
outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU_c6))
outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
@@ -680,6 +698,14 @@ void print_header(char *delim)
if (DO_BIC(BIC_CoreTmp))
outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
+ if (do_rapl && !rapl_joules) {
+ if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
+ } else if (do_rapl && rapl_joules) {
+ if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
+ }
+
for (mp = sys.cp; mp; mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
@@ -734,7 +760,7 @@ void print_header(char *delim)
if (do_rapl && !rapl_joules) {
if (DO_BIC(BIC_PkgWatt))
outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
- if (DO_BIC(BIC_CorWatt))
+ if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFXWatt))
outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
@@ -747,7 +773,7 @@ void print_header(char *delim)
} else if (do_rapl && rapl_joules) {
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
- if (DO_BIC(BIC_Cor_J))
+ if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFX_J))
outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
@@ -808,6 +834,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
+ outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
@@ -904,6 +931,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (t == &average.threads) {
if (DO_BIC(BIC_Package))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Die))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_Node))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_Core))
@@ -921,6 +950,12 @@ int format_counters(struct thread_data *t, struct core_data *c,
else
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
+ if (DO_BIC(BIC_Die)) {
+ if (c)
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
+ else
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ }
if (DO_BIC(BIC_Node)) {
if (t)
outp += sprintf(outp, "%s%d",
@@ -1003,7 +1038,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
+ if (DO_BIC(BIC_CPU_c3))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
if (DO_BIC(BIC_CPU_c6))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
@@ -1033,6 +1068,20 @@ int format_counters(struct thread_data *t, struct core_data *c,
}
}
+ /*
+ * If measurement interval exceeds minimum RAPL Joule Counter range,
+ * indicate that results are suspect by printing "**" in fraction place.
+ */
+ if (interval_float < rapl_joule_counter_range)
+ fmt8 = "%s%.2f";
+ else
+ fmt8 = "%6.0f**";
+
+ if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+ if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+
/* print per-package data only for 1st core in package */
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
goto done;
@@ -1085,18 +1134,9 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (DO_BIC(BIC_SYS_LPI))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
- /*
- * If measurement interval exceeds minimum RAPL Joule Counter range,
- * indicate that results are suspect by printing "**" in fraction place.
- */
- if (interval_float < rapl_joule_counter_range)
- fmt8 = "%s%.2f";
- else
- fmt8 = "%6.0f**";
-
if (DO_BIC(BIC_PkgWatt))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
- if (DO_BIC(BIC_CorWatt))
+ if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
if (DO_BIC(BIC_GFXWatt))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
@@ -1104,7 +1144,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
- if (DO_BIC(BIC_Cor_J))
+ if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
if (DO_BIC(BIC_GFX_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
@@ -1249,6 +1289,8 @@ delta_core(struct core_data *new, struct core_data *old)
old->core_temp_c = new->core_temp_c;
old->mc6_us = new->mc6_us - old->mc6_us;
+ DELTA_WRAP32(new->core_energy, old->core_energy);
+
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
old->counter[i] = new->counter[i];
@@ -1391,6 +1433,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->c7 = 0;
c->mc6_us = 0;
c->core_temp_c = 0;
+ c->core_energy = 0;
p->pkg_wtd_core_c0 = 0;
p->pkg_any_core_c0 = 0;
@@ -1473,6 +1516,8 @@ int sum_counters(struct thread_data *t, struct core_data *c,
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
+ average.cores.core_energy += c->core_energy;
+
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
continue;
@@ -1818,7 +1863,7 @@ retry:
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
+ if (DO_BIC(BIC_CPU_c3)) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
}
@@ -1845,6 +1890,12 @@ retry:
c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
}
+ if (do_rapl & RAPL_AMD_F17H) {
+ if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
+ return -14;
+ c->core_energy = msr & 0xFFFFFFFF;
+ }
+
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &c->counter[i]))
return -10;
@@ -1934,6 +1985,11 @@ retry:
return -16;
p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
}
+ if (do_rapl & RAPL_AMD_F17H) {
+ if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
+ return -13;
+ p->energy_pkg = msr & 0xFFFFFFFF;
+ }
if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return -17;
@@ -2456,6 +2512,8 @@ void free_all_buffers(void)
/*
* Parse a file containing a single int.
+ * Return 0 if file can not be opened
+ * Exit if file can be opened, but can not be parsed
*/
int parse_int_file(const char *fmt, ...)
{
@@ -2467,7 +2525,9 @@ int parse_int_file(const char *fmt, ...)
va_start(args, fmt);
vsnprintf(path, sizeof(path), fmt, args);
va_end(args);
- filep = fopen_or_die(path, "r");
+ filep = fopen(path, "r");
+ if (!filep)
+ return 0;
if (fscanf(filep, "%d", &value) != 1)
err(1, "%s: failed to parse number from file", path);
fclose(filep);
@@ -2488,6 +2548,11 @@ int get_physical_package_id(int cpu)
return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
}
+int get_die_id(int cpu)
+{
+ return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
+}
+
int get_core_id(int cpu)
{
return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
@@ -2578,7 +2643,8 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
filep = fopen_or_die(path, "r");
do {
offset -= BITMASK_SIZE;
- fscanf(filep, "%lx%c", &map, &character);
+ if (fscanf(filep, "%lx%c", &map, &character) != 2)
+ err(1, "%s: failed to parse file", path);
for (shift = 0; shift < BITMASK_SIZE; shift++) {
if ((map >> shift) & 0x1) {
so = shift + offset;
@@ -2855,8 +2921,11 @@ int snapshot_cpu_lpi_us(void)
fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
- if (retval != 1)
- err(1, "CPU LPI");
+ if (retval != 1) {
+ fprintf(stderr, "Disabling Low Power Idle CPU output\n");
+ BIC_NOT_PRESENT(BIC_CPU_LPI);
+ return -1;
+ }
fclose(fp);
@@ -2878,9 +2947,11 @@ int snapshot_sys_lpi_us(void)
fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
- if (retval != 1)
- err(1, "SYS LPI");
-
+ if (retval != 1) {
+ fprintf(stderr, "Disabling Low Power Idle System output\n");
+ BIC_NOT_PRESENT(BIC_SYS_LPI);
+ return -1;
+ }
fclose(fp);
return 0;
@@ -3410,14 +3481,14 @@ dump_sysfs_cstate_config(void)
input = fopen(path, "r");
if (input == NULL)
continue;
- fgets(name_buf, sizeof(name_buf), input);
+ if (!fgets(name_buf, sizeof(name_buf), input))
+ err(1, "%s: failed to read file", path);
/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
sp = strchr(name_buf, '-');
if (!sp)
sp = strchrnul(name_buf, '\n');
*sp = '\0';
-
fclose(input);
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
@@ -3425,7 +3496,8 @@ dump_sysfs_cstate_config(void)
input = fopen(path, "r");
if (input == NULL)
continue;
- fgets(desc, sizeof(desc), input);
+ if (!fgets(desc, sizeof(desc), input))
+ err(1, "%s: failed to read file", path);
fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
fclose(input);
@@ -3444,20 +3516,22 @@ dump_sysfs_pstate_config(void)
base_cpu);
input = fopen(path, "r");
if (input == NULL) {
- fprintf(stderr, "NSFOD %s\n", path);
+ fprintf(outf, "NSFOD %s\n", path);
return;
}
- fgets(driver_buf, sizeof(driver_buf), input);
+ if (!fgets(driver_buf, sizeof(driver_buf), input))
+ err(1, "%s: failed to read file", path);
fclose(input);
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
base_cpu);
input = fopen(path, "r");
if (input == NULL) {
- fprintf(stderr, "NSFOD %s\n", path);
+ fprintf(outf, "NSFOD %s\n", path);
return;
}
- fgets(governor_buf, sizeof(governor_buf), input);
+ if (!fgets(governor_buf, sizeof(governor_buf), input))
+ err(1, "%s: failed to read file", path);
fclose(input);
fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
@@ -3466,7 +3540,8 @@ dump_sysfs_pstate_config(void)
sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
input = fopen(path, "r");
if (input != NULL) {
- fscanf(input, "%d", &turbo);
+ if (fscanf(input, "%d", &turbo) != 1)
+ err(1, "%s: failed to parse number from file", path);
fprintf(outf, "cpufreq boost: %d\n", turbo);
fclose(input);
}
@@ -3474,7 +3549,8 @@ dump_sysfs_pstate_config(void)
sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
input = fopen(path, "r");
if (input != NULL) {
- fscanf(input, "%d", &turbo);
+ if (fscanf(input, "%d", &turbo) != 1)
+ err(1, "%s: failed to parse number from file", path);
fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
fclose(input);
}
@@ -3718,7 +3794,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
-double get_tdp(unsigned int model)
+double get_tdp_intel(unsigned int model)
{
unsigned long long msr;
@@ -3735,6 +3811,16 @@ double get_tdp(unsigned int model)
}
}
+double get_tdp_amd(unsigned int family)
+{
+ switch (family) {
+ case 0x17:
+ default:
+ /* This is the max stock TDP of HEDT/Server Fam17h chips */
+ return 250.0;
+ }
+}
+
/*
* rapl_dram_energy_units_probe()
* Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
@@ -3754,21 +3840,12 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units)
}
}
-
-/*
- * rapl_probe()
- *
- * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
- */
-void rapl_probe(unsigned int family, unsigned int model)
+void rapl_probe_intel(unsigned int family, unsigned int model)
{
unsigned long long msr;
unsigned int time_unit;
double tdp;
- if (!genuine_intel)
- return;
-
if (family != 6)
return;
@@ -3892,13 +3969,69 @@ void rapl_probe(unsigned int family, unsigned int model)
rapl_time_units = 1.0 / (1 << (time_unit));
- tdp = get_tdp(model);
+ tdp = get_tdp_intel(model);
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (!quiet)
fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
+}
- return;
+void rapl_probe_amd(unsigned int family, unsigned int model)
+{
+ unsigned long long msr;
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int has_rapl = 0;
+ double tdp;
+
+ if (max_extended_level >= 0x80000007) {
+ __cpuid(0x80000007, eax, ebx, ecx, edx);
+ /* RAPL (Fam 17h) */
+ has_rapl = edx & (1 << 14);
+ }
+
+ if (!has_rapl)
+ return;
+
+ switch (family) {
+ case 0x17: /* Zen, Zen+ */
+ do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ }
+ break;
+ default:
+ return;
+ }
+
+ if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
+ return;
+
+ rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
+ rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
+ rapl_power_units = ldexp(1.0, -(msr & 0xf));
+
+ tdp = get_tdp_amd(model);
+
+ rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+ if (!quiet)
+ fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
+}
+
+/*
+ * rapl_probe()
+ *
+ * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
+ */
+void rapl_probe(unsigned int family, unsigned int model)
+{
+ if (genuine_intel)
+ rapl_probe_intel(family, model);
+ if (authentic_amd)
+ rapl_probe_amd(family, model);
}
void perf_limit_reasons_probe(unsigned int family, unsigned int model)
@@ -4003,6 +4136,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
+ const char *msr_name;
int cpu;
if (!do_rapl)
@@ -4018,10 +4152,17 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -1;
}
- if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
- return -1;
+ if (do_rapl & RAPL_AMD_F17H) {
+ msr_name = "MSR_RAPL_PWR_UNIT";
+ if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
+ return -1;
+ } else {
+ msr_name = "MSR_RAPL_POWER_UNIT";
+ if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
+ return -1;
+ }
- fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
+ fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
rapl_power_units, rapl_energy_units, rapl_time_units);
if (do_rapl & RAPL_PKG_POWER_INFO) {
@@ -4451,6 +4592,9 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
return INTEL_FAM6_SKYLAKE_MOBILE;
+
+ case INTEL_FAM6_ICELAKE_MOBILE:
+ return INTEL_FAM6_CANNONLAKE_MOBILE;
}
return model;
}
@@ -4702,7 +4846,9 @@ void process_cpuid()
}
do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model);
- do_cnl_cstates = is_cnl(family, model);
+
+ if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
+ BIC_NOT_PRESENT(BIC_CPU_c3);
if (!quiet)
decode_misc_pwr_mgmt_msr();
@@ -4769,6 +4915,7 @@ void topology_probe()
int i;
int max_core_id = 0;
int max_package_id = 0;
+ int max_die_id = 0;
int max_siblings = 0;
/* Initialize num_cpus, max_cpu_num */
@@ -4835,6 +4982,11 @@ void topology_probe()
if (cpus[i].physical_package_id > max_package_id)
max_package_id = cpus[i].physical_package_id;
+ /* get die information */
+ cpus[i].die_id = get_die_id(i);
+ if (cpus[i].die_id > max_die_id)
+ max_die_id = cpus[i].die_id;
+
/* get numa node information */
cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
if (cpus[i].physical_node_id > topo.max_node_num)
@@ -4860,6 +5012,13 @@ void topology_probe()
if (!summary_only && topo.cores_per_node > 1)
BIC_PRESENT(BIC_Core);
+ topo.num_die = max_die_id + 1;
+ if (debug > 1)
+ fprintf(outf, "max_die_id %d, sizing for %d die\n",
+ max_die_id, topo.num_die);
+ if (!summary_only && topo.num_die > 1)
+ BIC_PRESENT(BIC_Die);
+
topo.num_packages = max_package_id + 1;
if (debug > 1)
fprintf(outf, "max_package_id %d, sizing for %d packages\n",
@@ -4884,8 +5043,8 @@ void topology_probe()
if (cpu_is_not_present(i))
continue;
fprintf(outf,
- "cpu %d pkg %d node %d lnode %d core %d thread %d\n",
- i, cpus[i].physical_package_id,
+ "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
+ i, cpus[i].physical_package_id, cpus[i].die_id,
cpus[i].physical_node_id,
cpus[i].logical_node_id,
cpus[i].physical_core_id,
@@ -5122,7 +5281,7 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(outf, "turbostat version 18.07.27"
+ fprintf(outf, "turbostat version 19.03.20"
" - Len Brown <lenb@kernel.org>\n");
}
@@ -5319,7 +5478,8 @@ void probe_sysfs(void)
input = fopen(path, "r");
if (input == NULL)
continue;
- fgets(name_buf, sizeof(name_buf), input);
+ if (!fgets(name_buf, sizeof(name_buf), input))
+ err(1, "%s: failed to read file", path);
/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
sp = strchr(name_buf, '-');
@@ -5346,7 +5506,8 @@ void probe_sysfs(void)
input = fopen(path, "r");
if (input == NULL)
continue;
- fgets(name_buf, sizeof(name_buf), input);
+ if (!fgets(name_buf, sizeof(name_buf), input))
+ err(1, "%s: failed to read file", path);
/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
sp = strchr(name_buf, '-');
if (!sp)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index c9433a496d54..c81fc350f7ad 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -180,6 +180,8 @@ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_sk_fullsock;
static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
(void *) BPF_FUNC_tcp_sock;
+static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
+ (void *) BPF_FUNC_get_listener_sock;
static int (*bpf_skb_ecn_set_ce)(void *ctx) =
(void *) BPF_FUNC_skb_ecn_set_ce;
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index bcbd928c96ab..fc818bc1d729 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -39,6 +39,58 @@ static struct bpf_flow_keys pkt_v6_flow_keys = {
.n_proto = __bpf_constant_htons(ETH_P_IPV6),
};
+#define VLAN_HLEN 4
+
+static struct {
+ struct ethhdr eth;
+ __u16 vlan_tci;
+ __u16 vlan_proto;
+ struct iphdr iph;
+ struct tcphdr tcp;
+} __packed pkt_vlan_v4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_8021Q),
+ .vlan_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_TCP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+ .tcp.doff = 5,
+};
+
+static struct bpf_flow_keys pkt_vlan_v4_flow_keys = {
+ .nhoff = VLAN_HLEN,
+ .thoff = VLAN_HLEN + sizeof(struct iphdr),
+ .addr_proto = ETH_P_IP,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IP),
+};
+
+static struct {
+ struct ethhdr eth;
+ __u16 vlan_tci;
+ __u16 vlan_proto;
+ __u16 vlan_tci2;
+ __u16 vlan_proto2;
+ struct ipv6hdr iph;
+ struct tcphdr tcp;
+} __packed pkt_vlan_v6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_8021AD),
+ .vlan_proto = __bpf_constant_htons(ETH_P_8021Q),
+ .vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+ .tcp.doff = 5,
+};
+
+static struct bpf_flow_keys pkt_vlan_v6_flow_keys = {
+ .nhoff = VLAN_HLEN * 2,
+ .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+};
+
void test_flow_dissector(void)
{
struct bpf_flow_keys flow_keys;
@@ -68,5 +120,21 @@ void test_flow_dissector(void)
err, errno, retval, duration, size, sizeof(flow_keys));
CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys);
+ err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v4, sizeof(pkt_vlan_v4),
+ &flow_keys, &size, &retval, &duration);
+ CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv4",
+ "err %d errno %d retval %d duration %d size %u/%lu\n",
+ err, errno, retval, duration, size, sizeof(flow_keys));
+ CHECK_FLOW_KEYS("vlan_ipv4_flow_keys", flow_keys,
+ pkt_vlan_v4_flow_keys);
+
+ err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v6, sizeof(pkt_vlan_v6),
+ &flow_keys, &size, &retval, &duration);
+ CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv6",
+ "err %d errno %d retval %d duration %d size %u/%lu\n",
+ err, errno, retval, duration, size, sizeof(flow_keys));
+ CHECK_FLOW_KEYS("vlan_ipv6_flow_keys", flow_keys,
+ pkt_vlan_v6_flow_keys);
+
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 90f8a206340a..ee99368c595c 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -37,7 +37,7 @@ void test_map_lock(void)
const char *file = "./test_map_lock.o";
int prog_fd, map_fd[2], vars[17] = {};
pthread_t thread_id[6];
- struct bpf_object *obj;
+ struct bpf_object *obj = NULL;
int err = 0, key = 0, i;
void *ret;
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 9a573a9675d7..114ebe6a438e 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -5,7 +5,7 @@ void test_spinlock(void)
{
const char *file = "./test_spin_lock.o";
pthread_t thread_id[4];
- struct bpf_object *obj;
+ struct bpf_object *obj = NULL;
int prog_fd;
int err = 0, i;
void *ret;
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 284660f5aa95..75b17cada539 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -92,7 +92,6 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
{
struct bpf_flow_keys *keys = skb->flow_keys;
- keys->n_proto = proto;
switch (proto) {
case bpf_htons(ETH_P_IP):
bpf_tail_call(skb, &jmp_table, IP);
@@ -119,10 +118,9 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
SEC("flow_dissector")
int _dissect(struct __sk_buff *skb)
{
- if (!skb->vlan_present)
- return parse_eth_proto(skb, skb->protocol);
- else
- return parse_eth_proto(skb, skb->vlan_proto);
+ struct bpf_flow_keys *keys = skb->flow_keys;
+
+ return parse_eth_proto(skb, keys->n_proto);
}
/* Parses on IPPROTO_* */
@@ -336,15 +334,9 @@ PROG(VLAN)(struct __sk_buff *skb)
{
struct bpf_flow_keys *keys = skb->flow_keys;
struct vlan_hdr *vlan, _vlan;
- __be16 proto;
-
- /* Peek back to see if single or double-tagging */
- if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
- sizeof(proto)))
- return BPF_DROP;
/* Account for double-tagging */
- if (proto == bpf_htons(ETH_P_8021AD)) {
+ if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
if (!vlan)
return BPF_DROP;
@@ -352,6 +344,7 @@ PROG(VLAN)(struct __sk_buff *skb)
if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
return BPF_DROP;
+ keys->nhoff += sizeof(*vlan);
keys->thoff += sizeof(*vlan);
}
@@ -359,12 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb)
if (!vlan)
return BPF_DROP;
+ keys->nhoff += sizeof(*vlan);
keys->thoff += sizeof(*vlan);
/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
return BPF_DROP;
+ keys->n_proto = vlan->h_vlan_encapsulated_proto;
return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
}
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
index de1a43e8f610..37328f148538 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -8,38 +8,51 @@
#include "bpf_helpers.h"
#include "bpf_endian.h"
-enum bpf_array_idx {
- SRV_IDX,
- CLI_IDX,
- __NR_BPF_ARRAY_IDX,
+enum bpf_addr_array_idx {
+ ADDR_SRV_IDX,
+ ADDR_CLI_IDX,
+ __NR_BPF_ADDR_ARRAY_IDX,
+};
+
+enum bpf_result_array_idx {
+ EGRESS_SRV_IDX,
+ EGRESS_CLI_IDX,
+ INGRESS_LISTEN_IDX,
+ __NR_BPF_RESULT_ARRAY_IDX,
+};
+
+enum bpf_linum_array_idx {
+ EGRESS_LINUM_IDX,
+ INGRESS_LINUM_IDX,
+ __NR_BPF_LINUM_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") addr_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct sockaddr_in6),
- .max_entries = __NR_BPF_ARRAY_IDX,
+ .max_entries = __NR_BPF_ADDR_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") sock_result_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct bpf_sock),
- .max_entries = __NR_BPF_ARRAY_IDX,
+ .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") tcp_sock_result_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct bpf_tcp_sock),
- .max_entries = __NR_BPF_ARRAY_IDX,
+ .max_entries = __NR_BPF_RESULT_ARRAY_IDX,
};
struct bpf_map_def SEC("maps") linum_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
- .max_entries = 1,
+ .max_entries = __NR_BPF_LINUM_ARRAY_IDX,
};
static bool is_loopback6(__u32 *a6)
@@ -100,18 +113,20 @@ static void tpcpy(struct bpf_tcp_sock *dst,
#define RETURN { \
linum = __LINE__; \
- bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \
+ bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
return 1; \
}
SEC("cgroup_skb/egress")
-int read_sock_fields(struct __sk_buff *skb)
+int egress_read_sock_fields(struct __sk_buff *skb)
{
- __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx;
+ __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
struct sockaddr_in6 *srv_sa6, *cli_sa6;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
- __u32 linum, idx0 = 0;
+ __u32 linum, linum_idx;
+
+ linum_idx = EGRESS_LINUM_IDX;
sk = skb->sk;
if (!sk || sk->state == 10)
@@ -132,14 +147,55 @@ int read_sock_fields(struct __sk_buff *skb)
RETURN;
if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
- idx = srv_idx;
+ result_idx = EGRESS_SRV_IDX;
else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
- idx = cli_idx;
+ result_idx = EGRESS_CLI_IDX;
else
RETURN;
- sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx);
- tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx);
+ sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
+ tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
+ if (!sk_ret || !tp_ret)
+ RETURN;
+
+ skcpy(sk_ret, sk);
+ tpcpy(tp_ret, tp);
+
+ RETURN;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress_read_sock_fields(struct __sk_buff *skb)
+{
+ __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
+ struct bpf_tcp_sock *tp, *tp_ret;
+ struct bpf_sock *sk, *sk_ret;
+ struct sockaddr_in6 *srv_sa6;
+ __u32 linum, linum_idx;
+
+ linum_idx = INGRESS_LINUM_IDX;
+
+ sk = skb->sk;
+ if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
+ RETURN;
+
+ srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
+ if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
+ RETURN;
+
+ if (sk->state != 10 && sk->state != 12)
+ RETURN;
+
+ sk = bpf_get_listener_sock(sk);
+ if (!sk)
+ RETURN;
+
+ tp = bpf_tcp_sock(sk);
+ if (!tp)
+ RETURN;
+
+ sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
+ tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
if (!sk_ret || !tp_ret)
RETURN;
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 38797aa627a7..ec5794e4205b 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5777,6 +5777,53 @@ const struct btf_dedup_test dedup_tests[] = {
},
},
{
+ .descr = "dedup: void equiv check",
+ /*
+ * // CU 1:
+ * struct s {
+ * struct {} *x;
+ * };
+ * // CU 2:
+ * struct s {
+ * int *x;
+ * };
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */
+ BTF_PTR_ENC(1), /* [2] ptr -> [1] */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ /* CU 2 */
+ BTF_PTR_ENC(0), /* [4] ptr -> void */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */
+ BTF_MEMBER_ENC(NAME_NTH(2), 4, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0s\0x"),
+ },
+ .expect = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(0, 0, 1), /* [1] struct {} */
+ BTF_PTR_ENC(1), /* [2] ptr -> [1] */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [3] struct s */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ /* CU 2 */
+ BTF_PTR_ENC(0), /* [4] ptr -> void */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 8), /* [5] struct s */
+ BTF_MEMBER_ENC(NAME_NTH(2), 4, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0s\0x"),
+ },
+ .opts = {
+ .dont_resolve_fwds = false,
+ .dedup_table_size = 1, /* force hash collisions */
+ },
+},
+{
.descr = "dedup: all possible kinds (no duplicates)",
.input = {
.raw_types = {
@@ -5874,6 +5921,50 @@ const struct btf_dedup_test dedup_tests[] = {
.dont_resolve_fwds = false,
},
},
+{
+ .descr = "dedup: enum fwd resolution",
+ .input = {
+ .raw_types = {
+ /* [1] fwd enum 'e1' before full enum */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
+ /* [2] full enum 'e1' after fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 123),
+ /* [3] full enum 'e2' before fwd */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(4), 456),
+ /* [4] fwd enum 'e2' after full enum */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
+ /* [5] incompatible fwd enum with different size */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
+ /* [6] incompatible full enum with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] full enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 123),
+ /* [2] full enum 'e2' */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(4), 456),
+ /* [3] incompatible fwd enum with different size */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
+ /* [4] incompatible full enum with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+ .opts = {
+ .dont_resolve_fwds = false,
+ },
+},
};
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index bc8943938bf5..dcae7f664dce 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -16,10 +16,23 @@
#include "cgroup_helpers.h"
#include "bpf_rlimit.h"
-enum bpf_array_idx {
- SRV_IDX,
- CLI_IDX,
- __NR_BPF_ARRAY_IDX,
+enum bpf_addr_array_idx {
+ ADDR_SRV_IDX,
+ ADDR_CLI_IDX,
+ __NR_BPF_ADDR_ARRAY_IDX,
+};
+
+enum bpf_result_array_idx {
+ EGRESS_SRV_IDX,
+ EGRESS_CLI_IDX,
+ INGRESS_LISTEN_IDX,
+ __NR_BPF_RESULT_ARRAY_IDX,
+};
+
+enum bpf_linum_array_idx {
+ EGRESS_LINUM_IDX,
+ INGRESS_LINUM_IDX,
+ __NR_BPF_LINUM_ARRAY_IDX,
};
#define CHECK(condition, tag, format...) ({ \
@@ -41,8 +54,16 @@ static int linum_map_fd;
static int addr_map_fd;
static int tp_map_fd;
static int sk_map_fd;
-static __u32 srv_idx = SRV_IDX;
-static __u32 cli_idx = CLI_IDX;
+
+static __u32 addr_srv_idx = ADDR_SRV_IDX;
+static __u32 addr_cli_idx = ADDR_CLI_IDX;
+
+static __u32 egress_srv_idx = EGRESS_SRV_IDX;
+static __u32 egress_cli_idx = EGRESS_CLI_IDX;
+static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
+
+static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
+static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
static void init_loopback6(struct sockaddr_in6 *sa6)
{
@@ -93,29 +114,46 @@ static void print_tp(const struct bpf_tcp_sock *tp)
static void check_result(void)
{
- struct bpf_tcp_sock srv_tp, cli_tp;
- struct bpf_sock srv_sk, cli_sk;
- __u32 linum, idx0 = 0;
+ struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
+ struct bpf_sock srv_sk, cli_sk, listen_sk;
+ __u32 ingress_linum, egress_linum;
int err;
- err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum);
+ err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
+ &egress_linum);
CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)",
+ err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
+ &ingress_linum);
+ CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d", err, errno);
+
+ err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
+ CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
+ "err:%d errno:%d", err, errno);
+ err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
+ CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
+ "err:%d errno:%d", err, errno);
+
+ err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
+ CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
"err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)",
+ err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
+ CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
"err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)",
+ err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
+ CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
"err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)",
+ err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
+ CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
"err:%d errno:%d", err, errno);
+ printf("listen_sk: ");
+ print_sk(&listen_sk);
+ printf("\n");
+
printf("srv_sk: ");
print_sk(&srv_sk);
printf("\n");
@@ -124,6 +162,10 @@ static void check_result(void)
print_sk(&cli_sk);
printf("\n");
+ printf("listen_tp: ");
+ print_tp(&listen_tp);
+ printf("\n");
+
printf("srv_tp: ");
print_tp(&srv_tp);
printf("\n");
@@ -132,6 +174,19 @@ static void check_result(void)
print_tp(&cli_tp);
printf("\n");
+ CHECK(listen_sk.state != 10 ||
+ listen_sk.family != AF_INET6 ||
+ listen_sk.protocol != IPPROTO_TCP ||
+ memcmp(listen_sk.src_ip6, &in6addr_loopback,
+ sizeof(listen_sk.src_ip6)) ||
+ listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
+ listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
+ listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+ listen_sk.dst_port,
+ "Unexpected listen_sk",
+ "Check listen_sk output. ingress_linum:%u",
+ ingress_linum);
+
CHECK(srv_sk.state == 10 ||
!srv_sk.state ||
srv_sk.family != AF_INET6 ||
@@ -142,7 +197,8 @@ static void check_result(void)
sizeof(srv_sk.dst_ip6)) ||
srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
srv_sk.dst_port != cli_sa6.sin6_port,
- "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum);
+ "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
+ egress_linum);
CHECK(cli_sk.state == 10 ||
!cli_sk.state ||
@@ -154,21 +210,31 @@ static void check_result(void)
sizeof(cli_sk.dst_ip6)) ||
cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
cli_sk.dst_port != srv_sa6.sin6_port,
- "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum);
+ "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
+ egress_linum);
+
+ CHECK(listen_tp.data_segs_out ||
+ listen_tp.data_segs_in ||
+ listen_tp.total_retrans ||
+ listen_tp.bytes_acked,
+ "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
+ ingress_linum);
CHECK(srv_tp.data_segs_out != 1 ||
srv_tp.data_segs_in ||
srv_tp.snd_cwnd != 10 ||
srv_tp.total_retrans ||
srv_tp.bytes_acked != DATA_LEN,
- "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum);
+ "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
+ egress_linum);
CHECK(cli_tp.data_segs_out ||
cli_tp.data_segs_in != 1 ||
cli_tp.snd_cwnd != 10 ||
cli_tp.total_retrans ||
cli_tp.bytes_received != DATA_LEN,
- "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum);
+ "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
+ egress_linum);
}
static void test(void)
@@ -211,10 +277,10 @@ static void test(void)
err, errno);
/* Update addr_map with srv_sa6 and cli_sa6 */
- err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0);
+ err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno);
- err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0);
+ err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
CHECK(err, "map_update", "err:%d errno:%d", err, errno);
/* Connect from cli_sa6 to srv_sa6 */
@@ -273,9 +339,9 @@ int main(int argc, char **argv)
struct bpf_prog_load_attr attr = {
.file = "test_sock_fields_kern.o",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .expected_attach_type = BPF_CGROUP_INET_EGRESS,
};
- int cgroup_fd, prog_fd, err;
+ int cgroup_fd, egress_fd, ingress_fd, err;
+ struct bpf_program *ingress_prog;
struct bpf_object *obj;
struct bpf_map *map;
@@ -293,12 +359,24 @@ int main(int argc, char **argv)
err = join_cgroup(TEST_CGROUP);
CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
- err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
+ ingress_prog = bpf_object__find_program_by_title(obj,
+ "cgroup_skb/ingress");
+ CHECK(!ingress_prog,
+ "bpf_object__find_program_by_title(cgroup_skb/ingress)",
+ "not found");
+ ingress_fd = bpf_program__fd(ingress_prog);
+
+ err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
"err:%d errno%d", err, errno);
+
+ err = bpf_prog_attach(ingress_fd, cgroup_fd,
+ BPF_CGROUP_INET_INGRESS, 0);
+ CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
+ "err:%d errno%d", err, errno);
close(cgroup_fd);
map = bpf_object__find_map_by_name(obj, "addr_map");
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 4004891afa9c..fb11240b758b 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -908,6 +908,44 @@
.result = REJECT,
},
{
+ "calls: stack depth check in dead code",
+ .insns = {
+ /* main */
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), /* call B */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */
+ BPF_EXIT_INSN(),
+ /* C */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */
+ BPF_EXIT_INSN(),
+ /* D */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */
+ BPF_EXIT_INSN(),
+ /* E */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */
+ BPF_EXIT_INSN(),
+ /* F */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */
+ BPF_EXIT_INSN(),
+ /* G */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */
+ BPF_EXIT_INSN(),
+ /* H */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "call stack",
+ .result = REJECT,
+},
+{
"calls: spill into caller stack frame",
.insns = {
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -1940,3 +1978,28 @@
.errstr = "!read_ok",
.result = REJECT,
},
+{
+ "calls: cross frame pruning - liveness propagation",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr = "!read_ok",
+ .result = REJECT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 3ed3593bd8b6..923f2110072d 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -605,3 +605,171 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
},
+{
+ "reference tracking: use ptr from bpf_tcp_sock() after release",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+},
+{
+ "reference tracking: use ptr from bpf_sk_fullsock() after release",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+},
+{
+ "reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+},
+{
+ "reference tracking: use sk after bpf_sk_release(tp)",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+},
+{
+ "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+},
+{
+ "reference tracking: bpf_sk_release(listen_sk)",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "reference has not been acquired before",
+},
+{
+ /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
+ "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
+ .insns = {
+ BPF_SK_LOOKUP,
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid mem access",
+},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 0ddfdf76aba5..416436231fab 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -342,7 +342,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "type=sock_common expected=sock",
+ .errstr = "reference has not been acquired before",
},
{
"bpf_sk_release(bpf_sk_fullsock(skb->sk))",
@@ -380,5 +380,5 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "type=tcp_sock expected=sock",
+ .errstr = "reference has not been acquired before",
},
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 2689d1ea6d7a..df1bf9230a74 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,9 +1,14 @@
/x86_64/cr4_cpuid_sync_test
/x86_64/evmcs_test
+/x86_64/hyperv_cpuid
+/x86_64/kvm_create_max_vcpus
/x86_64/platform_info_test
/x86_64/set_sregs_test
+/x86_64/smm_test
+/x86_64/state_test
/x86_64/sync_regs_test
/x86_64/vmx_close_while_nested_test
+/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
-/x86_64/state_test
+/clear_dirty_log_test
/dirty_log_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 3c1f4bdf9000..79c524395ebe 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,3 +1,5 @@
+include ../../../../scripts/Kbuild.include
+
all:
top_srcdir = ../../../..
@@ -17,6 +19,9 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86_64 += x86_64/smm_test
+TEST_GEN_PROGS_x86_64 += x86_64/kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
@@ -29,8 +34,12 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
-LDFLAGS += -pthread
+CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
+
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += -pthread $(no-pie-option)
# After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 4715cfba20dc..a29d1119ccb3 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -289,7 +289,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
guest_page_size = (1ul << guest_page_shift);
/* 1G of guest page sized pages */
- guest_num_pages = (1ul << (30 - guest_page_shift));
+ guest_num_pages = (1ul << (30 - guest_page_shift)) + 3;
host_page_size = getpagesize();
host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
!!((guest_num_pages * guest_page_size) % host_page_size);
@@ -311,7 +311,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
#ifdef USE_CLEAR_DIRTY_LOG
struct kvm_enable_cap cap = {};
- cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT;
+ cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
cap.args[0] = 1;
vm_enable_cap(vm, &cap);
#endif
@@ -359,7 +359,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
#ifdef USE_CLEAR_DIRTY_LOG
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
- DIV_ROUND_UP(host_num_pages, 64) * 64);
+ host_num_pages);
#endif
vm_dirty_log_verify(bmap);
iteration++;
@@ -427,7 +427,7 @@ int main(int argc, char *argv[])
int opt, i;
#ifdef USE_CLEAR_DIRTY_LOG
- if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) {
+ if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2)) {
fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n");
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index a84785b02557..8c6b9619797d 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -102,6 +102,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_mp_state *mp_state);
void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
@@ -117,6 +118,10 @@ void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
+void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state);
+int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state, bool ignore_error);
const char *exit_reason_str(unsigned int exit_reason);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index e2884c2b81ff..6063d5b2f356 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -778,6 +778,33 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+
#define MSR_IA32_TSCDEADLINE 0x000006e0
#define MSR_IA32_UCODE_WRITE 0x00000079
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index b52cfdefecbf..e9113857f44e 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -91,6 +91,11 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
if (vm->kvm_fd < 0)
exit(KSFT_SKIP);
+ if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
+ fprintf(stderr, "immediate_exit not available, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
"rc: %i errno: %i", vm->fd, errno);
@@ -1121,6 +1126,22 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
return rc;
}
+void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ vcpu->state->immediate_exit = 1;
+ ret = ioctl(vcpu->fd, KVM_RUN, NULL);
+ vcpu->state->immediate_exit = 0;
+
+ TEST_ASSERT(ret == -1 && errno == EINTR,
+ "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
+ ret, errno);
+}
+
/*
* VM VCPU Set MP State
*
@@ -1229,6 +1250,38 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
ret, errno);
}
+void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
+ TEST_ASSERT(ret == 0,
+ "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
+ ret, errno);
+}
+
+int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state, bool ignore_error)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
+ if (!ignore_error) {
+ TEST_ASSERT(ret == 0,
+ "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
+ ret, errno);
+ }
+
+ return ret;
+}
+
/*
* VM VCPU System Regs Get
*
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f28127f4a3af..dc7fae9fa424 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1030,6 +1030,14 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
nested_size, sizeof(state->nested_));
}
+ /*
+ * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+ * guest state is consistent only after userspace re-enters the
+ * kernel with KVM_RUN. Complete IO prior to migrating state
+ * to a new VM.
+ */
+ vcpu_run_complete_io(vm, vcpuid);
+
nmsrs = kvm_get_num_msrs(vm);
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
@@ -1093,12 +1101,6 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int r;
- if (state->nested.size) {
- r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
- }
-
r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
r);
@@ -1130,4 +1132,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
r);
+
+ if (state->nested.size) {
+ r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+ r);
+ }
}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index d503a51fad30..7c2c4d4055a8 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -87,22 +87,25 @@ int main(int argc, char *argv[])
while (1) {
rc = _vcpu_run(vm, VCPU_ID);
- if (run->exit_reason == KVM_EXIT_IO) {
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_SYNC:
- /* emulate hypervisor clearing CR4.OSXSAVE */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
- sregs.cr4 &= ~X86_CR4_OSXSAVE;
- vcpu_sregs_set(vm, VCPU_ID, &sregs);
- break;
- case UCALL_ABORT:
- TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
- break;
- case UCALL_DONE:
- goto done;
- default:
- TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
- }
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ /* emulate hypervisor clearing CR4.OSXSAVE */
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.cr4 &= ~X86_CR4_OSXSAVE;
+ vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ break;
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index c49c2a28b0eb..36669684eca5 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -123,8 +123,6 @@ int main(int argc, char *argv[])
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
@@ -144,6 +142,9 @@ int main(int argc, char *argv[])
stage, (ulong)uc.args[1]);
state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
kvm_vm_release(vm);
/* Restore state in a new VM. */
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c
new file mode 100644
index 000000000000..50e92996f918
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c
@@ -0,0 +1,70 @@
+/*
+ * kvm_create_max_vcpus
+ *
+ * Copyright (C) 2019, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_VCPU_ID.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+
+void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ printf("Testing creating %d vCPUs, with IDs %d...%d.\n",
+ num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
+
+ vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+ for (i = 0; i < num_vcpus; i++) {
+ int vcpu_id = first_vcpu_id + i;
+
+ /* This asserts that the vCPU was created. */
+ vm_vcpu_add(vm, vcpu_id, 0, 0);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
+ int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+
+ printf("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
+ printf("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
+
+ /*
+ * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
+ * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
+ * in this case.
+ */
+ if (!kvm_max_vcpu_id)
+ kvm_max_vcpu_id = kvm_max_vcpus;
+
+ TEST_ASSERT(kvm_max_vcpu_id >= kvm_max_vcpus,
+ "KVM_MAX_VCPU_ID (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
+ kvm_max_vcpu_id, kvm_max_vcpus);
+
+ test_vcpu_creation(0, kvm_max_vcpus);
+
+ if (kvm_max_vcpu_id > kvm_max_vcpus)
+ test_vcpu_creation(
+ kvm_max_vcpu_id - kvm_max_vcpus, kvm_max_vcpus);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
new file mode 100644
index 000000000000..fb8086964d83
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for SMM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID 1
+
+#define PAGE_SIZE 4096
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+#define SYNC_PORT 0xe
+#define DONE 0xff
+
+/*
+ * This is compiled as normal 64-bit code, however, SMI handler is executed
+ * in real-address mode. To stay simple we're limiting ourselves to a mode
+ * independent subset of asm here.
+ * SMI handler always report back fixed stage SMRAM_STAGE.
+ */
+uint8_t smi_handler[] = {
+ 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
+ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
+ 0x0f, 0xaa, /* rsm */
+};
+
+void sync_with_host(uint64_t phase)
+{
+ asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
+ : : "a" (phase));
+}
+
+void self_smi(void)
+{
+ wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+
+ sync_with_host(1);
+
+ wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
+
+ sync_with_host(2);
+
+ self_smi();
+
+ sync_with_host(4);
+
+ if (vmx_pages) {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+ sync_with_host(5);
+
+ self_smi();
+
+ sync_with_host(7);
+ }
+
+ sync_with_host(DONE);
+}
+
+int main(int argc, char *argv[])
+{
+ struct vmx_pages *vmx_pages = NULL;
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ int stage, stage_reported;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
+ SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+ TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
+ == SMRAM_GPA, "could not allocate guest physical addresses?");
+
+ memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
+ memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
+ sizeof(smi_handler));
+
+ vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+
+ if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+ vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ } else {
+ printf("will skip SMM test with VMX enabled\n");
+ vcpu_args_set(vm, VCPU_ID, 1, 0);
+ }
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ memset(&regs, 0, sizeof(regs));
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+
+ stage_reported = regs.rax & 0xff;
+
+ if (stage_reported == DONE)
+ goto done;
+
+ TEST_ASSERT(stage_reported == stage ||
+ stage_reported == SMRAM_STAGE,
+ "Unexpected stage: #%x, got %x",
+ stage, stage_reported);
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ kvm_vm_release(vm);
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID, 0, 0);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ free(state);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 4b3f556265f1..e0a3c0204b7c 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -156,8 +156,6 @@ int main(int argc, char *argv[])
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
@@ -177,6 +175,9 @@ int main(int argc, char *argv[])
stage, (ulong)uc.args[1]);
state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
kvm_vm_release(vm);
/* Restore state in a new VM. */
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
new file mode 100644
index 000000000000..61a2163cf9f1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -0,0 +1,280 @@
+/*
+ * vmx_set_nested_state_test
+ *
+ * Copyright (C) 2019, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <errno.h>
+#include <linux/kvm.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+/*
+ * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value
+ * changes this should be updated.
+ */
+#define VMCS12_REVISION 0x11e57ed0
+#define VCPU_ID 5
+
+void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state)
+{
+ volatile struct kvm_run *run;
+
+ vcpu_nested_state_set(vm, VCPU_ID, state, false);
+ run = vcpu_state(vm, VCPU_ID);
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+}
+
+void test_nested_state_expect_errno(struct kvm_vm *vm,
+ struct kvm_nested_state *state,
+ int expected_errno)
+{
+ volatile struct kvm_run *run;
+ int rv;
+
+ rv = vcpu_nested_state_set(vm, VCPU_ID, state, true);
+ TEST_ASSERT(rv == -1 && errno == expected_errno,
+ "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
+ strerror(expected_errno), expected_errno, rv, strerror(errno),
+ errno);
+ run = vcpu_state(vm, VCPU_ID);
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+}
+
+void test_nested_state_expect_einval(struct kvm_vm *vm,
+ struct kvm_nested_state *state)
+{
+ test_nested_state_expect_errno(vm, state, EINVAL);
+}
+
+void test_nested_state_expect_efault(struct kvm_vm *vm,
+ struct kvm_nested_state *state)
+{
+ test_nested_state_expect_errno(vm, state, EFAULT);
+}
+
+void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
+ u32 vmcs12_revision)
+{
+ /* Set revision_id in vmcs12 to vmcs12_revision. */
+ *(u32 *)(state->data) = vmcs12_revision;
+}
+
+void set_default_state(struct kvm_nested_state *state)
+{
+ memset(state, 0, sizeof(*state));
+ state->flags = KVM_STATE_NESTED_RUN_PENDING |
+ KVM_STATE_NESTED_GUEST_MODE;
+ state->format = 0;
+ state->size = sizeof(*state);
+}
+
+void set_default_vmx_state(struct kvm_nested_state *state, int size)
+{
+ memset(state, 0, size);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE |
+ KVM_STATE_NESTED_RUN_PENDING |
+ KVM_STATE_NESTED_EVMCS;
+ state->format = 0;
+ state->size = size;
+ state->vmx.vmxon_pa = 0x1000;
+ state->vmx.vmcs_pa = 0x2000;
+ state->vmx.smm.flags = 0;
+ set_revision_id_for_vmcs12(state, VMCS12_REVISION);
+}
+
+void test_vmx_nested_state(struct kvm_vm *vm)
+{
+ /* Add a page for VMCS12. */
+ const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+ struct kvm_nested_state *state =
+ (struct kvm_nested_state *)malloc(state_sz);
+
+ /* The format must be set to 0. 0 for VMX, 1 for SVM. */
+ set_default_vmx_state(state, state_sz);
+ state->format = 1;
+ test_nested_state_expect_einval(vm, state);
+
+ /*
+ * We cannot virtualize anything if the guest does not have VMX
+ * enabled.
+ */
+ set_default_vmx_state(state, state_sz);
+ test_nested_state_expect_einval(vm, state);
+
+ /*
+ * We cannot virtualize anything if the guest does not have VMX
+ * enabled. We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa
+ * is set to -1ull.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->vmx.vmxon_pa = -1ull;
+ test_nested_state(vm, state);
+
+ /* Enable VMX in the guest CPUID. */
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
+ set_default_vmx_state(state, state_sz);
+ state->vmx.vmxon_pa = -1ull;
+ state->vmx.smm.flags = 1;
+ test_nested_state_expect_einval(vm, state);
+
+ /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
+ set_default_vmx_state(state, state_sz);
+ state->vmx.vmxon_pa = -1ull;
+ state->vmx.vmcs_pa = 0;
+ test_nested_state_expect_einval(vm, state);
+
+ /*
+ * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
+ * setting the nested state.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->vmx.vmxon_pa = -1ull;
+ state->vmx.vmcs_pa = -1ull;
+ test_nested_state(vm, state);
+
+ /* It is invalid to have vmxon_pa set to a non-page aligned address. */
+ set_default_vmx_state(state, state_sz);
+ state->vmx.vmxon_pa = 1;
+ test_nested_state_expect_einval(vm, state);
+
+ /*
+ * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
+ * KVM_STATE_NESTED_GUEST_MODE set together.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE |
+ KVM_STATE_NESTED_RUN_PENDING;
+ state->vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+ test_nested_state_expect_einval(vm, state);
+
+ /*
+ * It is invalid to have any of the SMM flags set besides:
+ * KVM_STATE_NESTED_SMM_GUEST_MODE
+ * KVM_STATE_NESTED_SMM_VMXON
+ */
+ set_default_vmx_state(state, state_sz);
+ state->vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
+ KVM_STATE_NESTED_SMM_VMXON);
+ test_nested_state_expect_einval(vm, state);
+
+ /* Outside SMM, SMM flags must be zero. */
+ set_default_vmx_state(state, state_sz);
+ state->flags = 0;
+ state->vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+ test_nested_state_expect_einval(vm, state);
+
+ /* Size must be large enough to fit kvm_nested_state and vmcs12. */
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ test_nested_state(vm, state);
+
+ /* vmxon_pa cannot be the same address as vmcs_pa. */
+ set_default_vmx_state(state, state_sz);
+ state->vmx.vmxon_pa = 0;
+ state->vmx.vmcs_pa = 0;
+ test_nested_state_expect_einval(vm, state);
+
+ /* The revision id for vmcs12 must be VMCS12_REVISION. */
+ set_default_vmx_state(state, state_sz);
+ set_revision_id_for_vmcs12(state, 0);
+ test_nested_state_expect_einval(vm, state);
+
+ /*
+ * Test that if we leave nesting the state reflects that when we get
+ * it again.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->vmx.vmxon_pa = -1ull;
+ state->vmx.vmcs_pa = -1ull;
+ state->flags = 0;
+ test_nested_state(vm, state);
+ vcpu_nested_state_get(vm, VCPU_ID, state);
+ TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+ "Size must be between %d and %d. The size returned was %d.",
+ sizeof(*state), state_sz, state->size);
+ TEST_ASSERT(state->vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
+ TEST_ASSERT(state->vmx.vmcs_pa == -1ull, "vmcs_pa must be -1ull.");
+
+ free(state);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_nested_state state;
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+ printf("KVM_CAP_NESTED_STATE not available, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ /*
+ * AMD currently does not implement set_nested_state, so for now we
+ * just early out.
+ */
+ if (!(entry->ecx & CPUID_VMX)) {
+ fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, 0);
+
+ /* Passing a NULL kvm_nested_state causes a EFAULT. */
+ test_nested_state_expect_efault(vm, NULL);
+
+ /* 'size' cannot be smaller than sizeof(kvm_nested_state). */
+ set_default_state(&state);
+ state.size = 0;
+ test_nested_state_expect_einval(vm, &state);
+
+ /*
+ * Setting the flags 0xf fails the flags check. The only flags that
+ * can be used are:
+ * KVM_STATE_NESTED_GUEST_MODE
+ * KVM_STATE_NESTED_RUN_PENDING
+ * KVM_STATE_NESTED_EVMCS
+ */
+ set_default_state(&state);
+ state.flags = 0xf;
+ test_nested_state_expect_einval(vm, &state);
+
+ /*
+ * If KVM_STATE_NESTED_RUN_PENDING is set then
+ * KVM_STATE_NESTED_GUEST_MODE has to be set as well.
+ */
+ set_default_state(&state);
+ state.flags = KVM_STATE_NESTED_RUN_PENDING;
+ test_nested_state_expect_einval(vm, &state);
+
+ /*
+ * TODO: When SVM support is added for KVM_SET_NESTED_STATE
+ * add tests here to support it like VMX.
+ */
+ if (entry->ecx & CPUID_VMX)
+ test_vmx_nested_state(vm);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 5970cee6d05f..b074ea9b6fe8 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -286,5 +286,30 @@
"teardown": [
"$TC action flush action bpf"
]
+ },
+ {
+ "id": "b8a1",
+ "name": "Replace bpf action with invalid goto_chain control",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action bpf bytecode '1,6 0 0 4294967295' pass index 90"
+ ],
+ "cmdUnderTest": "$TC action replace action bpf bytecode '1,6 0 0 4294967295' goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action list action bpf",
+ "matchPattern": "action order [0-9]*: bpf.* default-action pass.*index 90",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
index 13147a1f5731..cadde8f41fcd 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
@@ -287,5 +287,30 @@
"teardown": [
"$TC actions flush action connmark"
]
+ },
+ {
+ "id": "c506",
+ "name": "Replace connmark with invalid goto chain control",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action connmark pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action connmark goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action connmark index 90",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
index a022792d392a..ddabb2fbb7c7 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -500,5 +500,30 @@
"matchPattern": "^[ \t]+index [0-9]+ ref",
"matchCount": "0",
"teardown": []
+ },
+ {
+ "id": "d128",
+ "name": "Replace csum action with invalid goto chain control",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action csum iph index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action csum iph goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action csum index 90",
+ "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index 89189a03ce3d..814b7a8a478b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -560,5 +560,30 @@
"teardown": [
"$TC actions flush action gact"
]
+ },
+ {
+ "id": "ca89",
+ "name": "Replace gact action with invalid goto chain control",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pass random determ drop 2 index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action goto chain 42 random determ drop 5 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass.*random type determ drop val 2.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
index 0da3545cabdb..c13a68b98fc7 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -1060,5 +1060,30 @@
"matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
"matchCount": "0",
"teardown": []
+ },
+ {
+ "id": "a0e2",
+ "name": "Replace ife encode action with invalid goto chain control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action ife encode allow mark pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action ife encode allow mark goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 90",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E .*allow mark.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index db49fd0f8445..6e5fb3d25681 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -434,5 +434,30 @@
"teardown": [
"$TC actions flush action mirred"
]
+ },
+ {
+ "id": "2a9a",
+ "name": "Replace mirred action with invalid goto chain control",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred ingress mirror dev lo drop index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action mirred ingress mirror dev lo goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action mirred index 90",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
index 0080dc2fd41c..bc12c1ccad30 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
@@ -589,5 +589,30 @@
"teardown": [
"$TC actions flush action nat"
]
+ },
+ {
+ "id": "4b12",
+ "name": "Replace nat action with invalid goto chain control",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 drop index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action nat ingress 1.18.1.1 1.18.2.2 goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action nat index 90",
+ "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 drop.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
new file mode 100644
index 000000000000..b73ceb9e28b1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
@@ -0,0 +1,51 @@
+[
+ {
+ "id": "319a",
+ "name": "Add pedit action that mangles IP TTL",
+ "category": [
+ "actions",
+ "pedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge ip ttl set 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 1.*index 1 ref.*key #0 at ipv4\\+8: val 0a000000 mask 00ffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "7e67",
+ "name": "Replace pedit action with invalid goto chain",
+ "category": [
+ "actions",
+ "pedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pedit ex munge ip ttl set 10 pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action pedit ex munge ip ttl set 10 goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action pedit",
+ "matchPattern": "action order [0-9]+: pedit action pass keys 1.*index 90 ref.*key #0 at ipv4\\+8: val 0a000000 mask 00ffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index 4086a50a670e..b8268da5adaa 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -739,5 +739,30 @@
"teardown": [
"$TC actions flush action police"
]
+ },
+ {
+ "id": "689e",
+ "name": "Replace police action with invalid goto chain control",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 3mbit burst 250k drop index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action police rate 3mbit burst 250k goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action police index 90",
+ "matchPattern": "action order [0-9]*: police 0x5a rate 3Mbit burst 250Kb mtu 2Kb action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
index 3aca33c00039..ddabb160a11b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -144,6 +144,30 @@
]
},
{
+ "id": "7571",
+ "name": "Add sample action with invalid rate",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 0 group 1 index 2",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/0 group 1.*index 2 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
"id": "b6d4",
"name": "Add sample action with mandatory arguments and invalid control action",
"category": [
@@ -584,5 +608,30 @@
"teardown": [
"$TC actions flush action sample"
]
+ },
+ {
+ "id": "0a6e",
+ "name": "Replace sample action with invalid goto chain control",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pass.*index 90",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
index e89a7aa4012d..8e8c1ae12260 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -126,5 +126,30 @@
"teardown": [
""
]
+ },
+ {
+ "id": "b776",
+ "name": "Replace simple action with invalid goto chain control",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"hello\" pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action simple sdata \"world\" goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <hello>.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
index 5aaf593b914a..ecd96eda7f6a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -484,5 +484,30 @@
"teardown": [
"$TC actions flush action skbedit"
]
+ },
+ {
+ "id": "1b2b",
+ "name": "Replace skbedit action with invalid goto_chain control",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbedit ptype host pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action skbedit ptype host goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype host pass.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
index fe3326e939c1..6eb4c4f97060 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -392,5 +392,30 @@
"teardown": [
"$TC actions flush action skbmod"
]
+ },
+ {
+ "id": "b651",
+ "name": "Replace skbmod action with invalid goto_chain control",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x1111 pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action skbmod set etype 0x1111 goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pass set etype 0x1111\\s+index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
index e7e15a7336b6..28453a445fdb 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -884,5 +884,30 @@
"teardown": [
"$TC actions flush action tunnel_key"
]
+ },
+ {
+ "id": "8242",
+ "name": "Replace tunnel_key set action with invalid goto chain",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 10.10.10.2 dst_ip 20.20.20.1 dst_port 3129 id 2 csum goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 90",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*csum pass.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 69ea09eefffc..cc7c7d758008 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -688,5 +688,30 @@
"teardown": [
"$TC actions flush action vlan"
]
+ },
+ {
+ "id": "e394",
+ "name": "Replace vlan push action with invalid goto chain control",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 500 pass index 90"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan push id 500 goto chain 42 index 90 cookie c1a0c1a0",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action vlan index 90",
+ "matchPattern": "action order [0-9]+: vlan.*push id 500 protocol 802.1Q priority 0 pass.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 99a5ffca1088..2d096b2abf2c 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -19,6 +19,26 @@
]
},
{
+ "id": "2638",
+ "name": "Add matchall and try to get it",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 clsact",
+ "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
+ ],
+ "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 clsact"
+ ]
+ },
+ {
"id": "d052",
"name": "Add 1M filters with the same action",
"category": [
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index 40ea95ce2ead..828c18584624 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -22,6 +22,7 @@ TPM2_CC_UNSEAL = 0x015E
TPM2_CC_FLUSH_CONTEXT = 0x0165
TPM2_CC_START_AUTH_SESSION = 0x0176
TPM2_CC_GET_CAPABILITY = 0x017A
+TPM2_CC_GET_RANDOM = 0x017B
TPM2_CC_PCR_READ = 0x017E
TPM2_CC_POLICY_PCR = 0x017F
TPM2_CC_PCR_EXTEND = 0x0182
@@ -357,9 +358,9 @@ class Client:
self.flags = flags
if (self.flags & Client.FLAG_SPACE) == 0:
- self.tpm = open('/dev/tpm0', 'r+b')
+ self.tpm = open('/dev/tpm0', 'r+b', buffering=0)
else:
- self.tpm = open('/dev/tpmrm0', 'r+b')
+ self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0)
def close(self):
self.tpm.close()
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 3bb066fea4a0..d4973be53493 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -158,6 +158,69 @@ class SmokeTest(unittest.TestCase):
pass
self.assertEqual(rejected, True)
+ def test_read_partial_resp(self):
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+ hdr = self.client.tpm.read(10)
+ sz = struct.unpack('>I', hdr[2:6])[0]
+ rsp = self.client.tpm.read()
+ except:
+ pass
+ self.assertEqual(sz, 10 + 2 + 32)
+ self.assertEqual(len(rsp), 2 + 32)
+
+ def test_read_partial_overwrite(self):
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+ # Read part of the respone
+ rsp1 = self.client.tpm.read(15)
+
+ # Send a new cmd
+ self.client.tpm.write(cmd)
+
+ # Read the whole respone
+ rsp2 = self.client.tpm.read()
+ except:
+ pass
+ self.assertEqual(len(rsp1), 15)
+ self.assertEqual(len(rsp2), 10 + 2 + 32)
+
+ def test_send_two_cmds(self):
+ rejected = False
+ try:
+ fmt = '>HIIH'
+ cmd = struct.pack(fmt,
+ tpm2.TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ tpm2.TPM2_CC_GET_RANDOM,
+ 0x20)
+ self.client.tpm.write(cmd)
+
+ # expect the second one to raise -EBUSY error
+ self.client.tpm.write(cmd)
+ rsp = self.client.tpm.read()
+
+ except IOError, e:
+ # read the response
+ rsp = self.client.tpm.read()
+ rejected = True
+ pass
+ except:
+ pass
+ self.assertEqual(rejected, True)
+
class SpaceTest(unittest.TestCase):
def setUp(self):
logging.basicConfig(filename='SpaceTest.log', level=logging.DEBUG)
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index ea434ddc8499..aad9284c043a 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -57,3 +57,6 @@ config HAVE_KVM_VCPU_ASYNC_IOCTL
config HAVE_KVM_VCPU_RUN_PID_CHANGE
bool
+
+config HAVE_KVM_NO_POLL
+ bool
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 8b7ca101f0f7..e5312f47d8e1 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -224,9 +224,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
- case KVM_CAP_NR_MEMSLOTS:
- r = KVM_USER_MEM_SLOTS;
- break;
case KVM_CAP_MSI_DEVID:
if (!kvm)
r = -EINVAL;
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 264d92da3240..370bd6c5e6cb 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -222,7 +222,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
}
}
- if (used_lrs) {
+ if (used_lrs || cpu_if->its_vpe.its_vm) {
int i;
u32 elrsr;
@@ -247,7 +247,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
int i;
- if (used_lrs) {
+ if (used_lrs || cpu_if->its_vpe.its_vm) {
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
for (i = 0; i < used_lrs; i++)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ffd7acdceac7..27c958306449 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -102,8 +102,7 @@ static bool kvm_is_device_pfn(unsigned long pfn)
* @addr: IPA
* @pmd: pmd pointer for IPA
*
- * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
- * pages in the range dirty.
+ * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs.
*/
static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
{
@@ -121,8 +120,7 @@ static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
* @addr: IPA
* @pud: pud pointer for IPA
*
- * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. Marks all
- * pages in the range dirty.
+ * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs.
*/
static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
{
@@ -899,9 +897,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
*
- * Allocates only the stage-2 HW PGD level table(s) (can support either full
- * 40-bit input addresses or limited to 32-bit input addresses). Clears the
- * allocated pages.
+ * Allocates only the stage-2 HW PGD level table(s) of size defined by
+ * stage2_pgd_size(kvm).
*
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.
@@ -1067,25 +1064,43 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
{
pmd_t *pmd, old_pmd;
+retry:
pmd = stage2_get_pmd(kvm, cache, addr);
VM_BUG_ON(!pmd);
old_pmd = *pmd;
+ /*
+ * Multiple vcpus faulting on the same PMD entry, can
+ * lead to them sequentially updating the PMD with the
+ * same value. Following the break-before-make
+ * (pmd_clear() followed by tlb_flush()) process can
+ * hinder forward progress due to refaults generated
+ * on missing translations.
+ *
+ * Skip updating the page table if the entry is
+ * unchanged.
+ */
+ if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+ return 0;
+
if (pmd_present(old_pmd)) {
/*
- * Multiple vcpus faulting on the same PMD entry, can
- * lead to them sequentially updating the PMD with the
- * same value. Following the break-before-make
- * (pmd_clear() followed by tlb_flush()) process can
- * hinder forward progress due to refaults generated
- * on missing translations.
+ * If we already have PTE level mapping for this block,
+ * we must unmap it to avoid inconsistent TLB state and
+ * leaking the table page. We could end up in this situation
+ * if the memory slot was marked for dirty logging and was
+ * reverted, leaving PTE level mappings for the pages accessed
+ * during the period. So, unmap the PTE level mapping for this
+ * block and retry, as we could have released the upper level
+ * table in the process.
*
- * Skip updating the page table if the entry is
- * unchanged.
+ * Normal THP split/merge follows mmu_notifier callbacks and do
+ * get handled accordingly.
*/
- if (pmd_val(old_pmd) == pmd_val(*new_pmd))
- return 0;
-
+ if (!pmd_thp_or_huge(old_pmd)) {
+ unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
+ goto retry;
+ }
/*
* Mapping in huge pages should only happen through a
* fault. If a page is merged into a transparent huge
@@ -1097,8 +1112,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
* should become splitting first, unmapped, merged,
* and mapped back in on-demand.
*/
- VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
-
+ WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
@@ -1114,6 +1128,7 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac
{
pud_t *pudp, old_pud;
+retry:
pudp = stage2_get_pud(kvm, cache, addr);
VM_BUG_ON(!pudp);
@@ -1121,14 +1136,23 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac
/*
* A large number of vcpus faulting on the same stage 2 entry,
- * can lead to a refault due to the
- * stage2_pud_clear()/tlb_flush(). Skip updating the page
- * tables if there is no change.
+ * can lead to a refault due to the stage2_pud_clear()/tlb_flush().
+ * Skip updating the page tables if there is no change.
*/
if (pud_val(old_pud) == pud_val(*new_pudp))
return 0;
if (stage2_pud_present(kvm, old_pud)) {
+ /*
+ * If we already have table level mapping for this block, unmap
+ * the range for this block and retry.
+ */
+ if (!stage2_pud_huge(kvm, old_pud)) {
+ unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
+ goto retry;
+ }
+
+ WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
stage2_pud_clear(kvm, pudp);
kvm_tlb_flush_vmid_ipa(kvm, addr);
} else {
@@ -1451,13 +1475,11 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
}
/**
- * stage2_wp_puds - write protect PGD range
- * @pgd: pointer to pgd entry
- * @addr: range start address
- * @end: range end address
- *
- * Process PUD entries, for a huge PUD we cause a panic.
- */
+ * stage2_wp_puds - write protect PGD range
+ * @pgd: pointer to pgd entry
+ * @addr: range start address
+ * @end: range end address
+ */
static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
@@ -1594,8 +1616,9 @@ static void kvm_send_hwpoison_signal(unsigned long address,
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
}
-static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot,
- unsigned long hva)
+static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
+ unsigned long hva,
+ unsigned long map_size)
{
gpa_t gpa_start;
hva_t uaddr_start, uaddr_end;
@@ -1610,34 +1633,34 @@ static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot,
/*
* Pages belonging to memslots that don't have the same alignment
- * within a PMD for userspace and IPA cannot be mapped with stage-2
- * PMD entries, because we'll end up mapping the wrong pages.
+ * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2
+ * PMD/PUD entries, because we'll end up mapping the wrong pages.
*
* Consider a layout like the following:
*
* memslot->userspace_addr:
* +-----+--------------------+--------------------+---+
- * |abcde|fgh Stage-1 PMD | Stage-1 PMD tv|xyz|
+ * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
* +-----+--------------------+--------------------+---+
*
* memslot->base_gfn << PAGE_SIZE:
* +---+--------------------+--------------------+-----+
- * |abc|def Stage-2 PMD | Stage-2 PMD |tvxyz|
+ * |abc|def Stage-2 block | Stage-2 block |tvxyz|
* +---+--------------------+--------------------+-----+
*
- * If we create those stage-2 PMDs, we'll end up with this incorrect
+ * If we create those stage-2 blocks, we'll end up with this incorrect
* mapping:
* d -> f
* e -> g
* f -> h
*/
- if ((gpa_start & ~S2_PMD_MASK) != (uaddr_start & ~S2_PMD_MASK))
+ if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
return false;
/*
* Next, let's make sure we're not trying to map anything not covered
- * by the memslot. This means we have to prohibit PMD size mappings
- * for the beginning and end of a non-PMD aligned and non-PMD sized
+ * by the memslot. This means we have to prohibit block size mappings
+ * for the beginning and end of a non-block aligned and non-block sized
* memory slot (illustrated by the head and tail parts of the
* userspace view above containing pages 'abcde' and 'xyz',
* respectively).
@@ -1646,8 +1669,8 @@ static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot,
* userspace_addr or the base_gfn, as both are equally aligned (per
* the check above) and equally sized.
*/
- return (hva & S2_PMD_MASK) >= uaddr_start &&
- (hva & S2_PMD_MASK) + S2_PMD_SIZE <= uaddr_end;
+ return (hva & ~(map_size - 1)) >= uaddr_start &&
+ (hva & ~(map_size - 1)) + map_size <= uaddr_end;
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
@@ -1676,12 +1699,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- if (!fault_supports_stage2_pmd_mappings(memslot, hva))
- force_pte = true;
-
- if (logging_active)
- force_pte = true;
-
/* Let's check if we will get back a huge page backed by hugetlbfs */
down_read(&current->mm->mmap_sem);
vma = find_vma_intersection(current->mm, hva, hva + 1);
@@ -1692,6 +1709,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
vma_pagesize = vma_kernel_pagesize(vma);
+ if (logging_active ||
+ !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
+ force_pte = true;
+ vma_pagesize = PAGE_SIZE;
+ }
+
/*
* The stage2 has a minimum of 2 level table (For arm64 see
* kvm_arm_setup_stage2()). Hence, we are guaranteed that we can
@@ -1699,11 +1722,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* As for PUD huge maps, we must make sure that we have at least
* 3 levels, i.e, PMD is not folded.
*/
- if ((vma_pagesize == PMD_SIZE ||
- (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm))) &&
- !force_pte) {
+ if (vma_pagesize == PMD_SIZE ||
+ (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm)))
gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
- }
up_read(&current->mm->mmap_sem);
/* We need minimum second+third level pages */
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index ab3f47745d9c..44ceaccb18cf 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -754,8 +754,9 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
int esz = GITS_BASER_ENTRY_SIZE(baser);
- int index;
+ int index, idx;
gfn_t gfn;
+ bool ret;
switch (type) {
case GITS_BASER_TYPE_DEVICE:
@@ -782,7 +783,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
if (eaddr)
*eaddr = addr;
- return kvm_is_visible_gfn(its->dev->kvm, gfn);
+
+ goto out;
}
/* calculate and check the index into the 1st level */
@@ -812,7 +814,12 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
if (eaddr)
*eaddr = indirect_ptr;
- return kvm_is_visible_gfn(its->dev->kvm, gfn);
+
+out:
+ idx = srcu_read_lock(&its->dev->kvm->srcu);
+ ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
+ srcu_read_unlock(&its->dev->kvm->srcu, idx);
+ return ret;
}
static int vgic_its_alloc_collection(struct vgic_its *its,
@@ -1729,8 +1736,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
kfree(its);
}
-int vgic_its_has_attr_regs(struct kvm_device *dev,
- struct kvm_device_attr *attr)
+static int vgic_its_has_attr_regs(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
{
const struct vgic_register_region *region;
gpa_t offset = attr->attr;
@@ -1750,9 +1757,9 @@ int vgic_its_has_attr_regs(struct kvm_device *dev,
return 0;
}
-int vgic_its_attr_regs_access(struct kvm_device *dev,
- struct kvm_device_attr *attr,
- u64 *reg, bool is_write)
+static int vgic_its_attr_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ u64 *reg, bool is_write)
{
const struct vgic_register_region *region;
struct vgic_its *its;
@@ -1919,7 +1926,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) |
ite->collection->collection_id;
val = cpu_to_le64(val);
- return kvm_write_guest(kvm, gpa, &val, ite_esz);
+ return kvm_write_guest_lock(kvm, gpa, &val, ite_esz);
}
/**
@@ -2066,7 +2073,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
(itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) |
(dev->num_eventid_bits - 1));
val = cpu_to_le64(val);
- return kvm_write_guest(kvm, ptr, &val, dte_esz);
+ return kvm_write_guest_lock(kvm, ptr, &val, dte_esz);
}
/**
@@ -2246,7 +2253,7 @@ static int vgic_its_save_cte(struct vgic_its *its,
((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) |
collection->collection_id);
val = cpu_to_le64(val);
- return kvm_write_guest(its->dev->kvm, gpa, &val, esz);
+ return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz);
}
static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
@@ -2317,7 +2324,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
*/
val = 0;
BUG_ON(cte_esz > sizeof(val));
- ret = kvm_write_guest(its->dev->kvm, gpa, &val, cte_esz);
+ ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz);
return ret;
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 408a78eb6a97..9f87e58dbd4a 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -358,7 +358,7 @@ retry:
if (status) {
/* clear consumed data */
val &= ~(1 << bit_nr);
- ret = kvm_write_guest(kvm, ptr, &val, 1);
+ ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
}
@@ -409,7 +409,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
else
val &= ~(1 << bit_nr);
- ret = kvm_write_guest(kvm, ptr, &val, 1);
+ ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index abd9c7352677..3af69f2a3866 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -867,15 +867,21 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
* either observe the new interrupt before or after doing this check,
* and introducing additional synchronization mechanism doesn't change
* this.
+ *
+ * Note that we still need to go through the whole thing if anything
+ * can be directly injected (GICv4).
*/
- if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
+ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
+ !vgic_supports_direct_msis(vcpu->kvm))
return;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
- raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
- vgic_flush_lr_state(vcpu);
- raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
+ raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ vgic_flush_lr_state(vcpu);
+ raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ }
if (can_access_vgic_from_kernel())
vgic_restore_state(vcpu);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 4325250afd72..001aeda4c154 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -214,9 +214,9 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
if (flags & EPOLLHUP) {
/* The eventfd is closing, detach from KVM */
- unsigned long flags;
+ unsigned long iflags;
- spin_lock_irqsave(&kvm->irqfds.lock, flags);
+ spin_lock_irqsave(&kvm->irqfds.lock, iflags);
/*
* We must check if someone deactivated the irqfd before
@@ -230,7 +230,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
if (irqfd_is_active(irqfd))
irqfd_deactivate(irqfd);
- spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
+ spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
}
return 0;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3547b0d8c91e..79e59e4fa3dc 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm,
{
struct kvm_kernel_irq_routing_entry *ei;
int r;
+ u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES);
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and non-irqchip routing.
*/
- hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
+ hlist_for_each_entry(ei, &rt->map[gsi], link)
if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return -EINVAL;
- e->gsi = ue->gsi;
+ e->gsi = gsi;
e->type = ue->type;
r = kvm_set_routing_entry(kvm, e, ue);
if (r)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f25aa98a94df..d22b1f4bfa56 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1134,11 +1134,11 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
/**
- * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
+ * kvm_get_dirty_log_protect - get a snapshot of dirty pages
* and reenable dirty page tracking for the corresponding pages.
* @kvm: pointer to kvm instance
* @log: slot id and address to which we copy the log
- * @is_dirty: flag set if any page is dirty
+ * @flush: true if TLB flush is needed by caller
*
* We need to keep it in mind that VCPU threads can write to the bitmap
* concurrently. So, to avoid losing track of dirty pages we keep the
@@ -1223,6 +1223,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
* and reenable dirty page tracking for the corresponding pages.
* @kvm: pointer to kvm instance
* @log: slot id and address from which to fetch the bitmap of dirty pages
+ * @flush: true if TLB flush is needed by caller
*/
int kvm_clear_dirty_log_protect(struct kvm *kvm,
struct kvm_clear_dirty_log *log, bool *flush)
@@ -1240,7 +1241,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
return -EINVAL;
- if ((log->first_page & 63) || (log->num_pages & 63))
+ if (log->first_page & 63)
return -EINVAL;
slots = __kvm_memslots(kvm, as_id);
@@ -1250,11 +1251,12 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
if (!dirty_bitmap)
return -ENOENT;
- n = kvm_dirty_bitmap_bytes(memslot);
+ n = ALIGN(log->num_pages, BITS_PER_LONG) / 8;
if (log->first_page > memslot->npages ||
- log->num_pages > memslot->npages - log->first_page)
- return -EINVAL;
+ log->num_pages > memslot->npages - log->first_page ||
+ (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63)))
+ return -EINVAL;
*flush = false;
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
@@ -1262,8 +1264,8 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
return -EFAULT;
spin_lock(&kvm->mmu_lock);
- for (offset = log->first_page,
- i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--;
+ for (offset = log->first_page, i = offset / BITS_PER_LONG,
+ n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--;
i++, offset += BITS_PER_LONG) {
unsigned long mask = *dirty_bitmap_buffer++;
atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i];
@@ -1740,6 +1742,70 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_page);
+static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct kvm_host_map *map)
+{
+ kvm_pfn_t pfn;
+ void *hva = NULL;
+ struct page *page = KVM_UNMAPPED_PAGE;
+
+ if (!map)
+ return -EINVAL;
+
+ pfn = gfn_to_pfn_memslot(slot, gfn);
+ if (is_error_noslot_pfn(pfn))
+ return -EINVAL;
+
+ if (pfn_valid(pfn)) {
+ page = pfn_to_page(pfn);
+ hva = kmap(page);
+ } else {
+ hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
+ }
+
+ if (!hva)
+ return -EFAULT;
+
+ map->page = page;
+ map->hva = hva;
+ map->pfn = pfn;
+ map->gfn = gfn;
+
+ return 0;
+}
+
+int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
+{
+ return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_map);
+
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+ bool dirty)
+{
+ if (!map)
+ return;
+
+ if (!map->hva)
+ return;
+
+ if (map->page)
+ kunmap(map->page);
+ else
+ memunmap(map->hva);
+
+ if (dirty) {
+ kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
+ kvm_release_pfn_dirty(map->pfn);
+ } else {
+ kvm_release_pfn_clean(map->pfn);
+ }
+
+ map->hva = NULL;
+ map->page = NULL;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
+
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
{
kvm_pfn_t pfn;
@@ -2253,7 +2319,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
u64 block_ns;
start = cur = ktime_get();
- if (vcpu->halt_poll_ns) {
+ if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
++vcpu->stat.halt_attempted_poll;
@@ -2884,6 +2950,16 @@ out:
}
#endif
+static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct kvm_device *dev = filp->private_data;
+
+ if (dev->ops->mmap)
+ return dev->ops->mmap(dev, vma);
+
+ return -ENODEV;
+}
+
static int kvm_device_ioctl_attr(struct kvm_device *dev,
int (*accessor)(struct kvm_device *dev,
struct kvm_device_attr *attr),
@@ -2905,6 +2981,9 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
{
struct kvm_device *dev = filp->private_data;
+ if (dev->kvm->mm != current->mm)
+ return -EIO;
+
switch (ioctl) {
case KVM_SET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
@@ -2925,6 +3004,13 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
struct kvm_device *dev = filp->private_data;
struct kvm *kvm = dev->kvm;
+ if (dev->ops->release) {
+ mutex_lock(&kvm->lock);
+ list_del(&dev->vm_node);
+ dev->ops->release(dev);
+ mutex_unlock(&kvm->lock);
+ }
+
kvm_put_kvm(kvm);
return 0;
}
@@ -2933,6 +3019,7 @@ static const struct file_operations kvm_device_fops = {
.unlocked_ioctl = kvm_device_ioctl,
.release = kvm_device_release,
KVM_COMPAT(kvm_device_ioctl),
+ .mmap = kvm_device_mmap,
};
struct kvm_device *kvm_device_from_filp(struct file *filp)
@@ -2974,12 +3061,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_device_ops *ops = NULL;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+ int type;
int ret;
if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
return -ENODEV;
- ops = kvm_device_ops_table[cd->type];
+ type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table));
+ ops = kvm_device_ops_table[type];
if (ops == NULL)
return -ENODEV;
@@ -2994,7 +3083,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
dev->kvm = kvm;
mutex_lock(&kvm->lock);
- ret = ops->create(dev, cd->type);
+ ret = ops->create(dev, type);
if (ret < 0) {
mutex_unlock(&kvm->lock);
kfree(dev);
@@ -3039,7 +3128,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_CHECK_EXTENSION_VM:
case KVM_CAP_ENABLE_CAP_VM:
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
- case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
+ case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
#endif
return 1;
#ifdef CONFIG_KVM_MMIO
@@ -3058,6 +3147,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#endif
case KVM_CAP_MAX_VCPU_ID:
return KVM_MAX_VCPU_ID;
+ case KVM_CAP_NR_MEMSLOTS:
+ return KVM_USER_MEM_SLOTS;
default:
break;
}
@@ -3075,7 +3166,7 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
{
switch (cap->cap) {
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
- case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
+ case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
if (cap->flags || (cap->args[0] & ~1))
return -EINVAL;
kvm->manual_dirty_log_protect = cap->args[0];